当前位置:网站首页>stylecloud ,wordcloud 库学习及使用例子
stylecloud ,wordcloud 库学习及使用例子
2022-04-23 11:15:00 【还债大湿兄】
安装pyhton
安装库 stylecloud
pip install numpy
pip install pillow
pip install matplotlib
pip install wordcloud
pip install stylecloud
因为要修改源代码stylecloud,所以要查看路径
#安装两次就可以看到安装路径了,如执行两次 pip install numpy。查看python则可以直接where python
因为stylecloud 默认不能设置模板及背景色透明,根据网上才哥(详细见【推荐收藏】介绍2种Python绘制词云的手法,你会偷偷pick谁呢?)的可做如下修改:
我的库安装路径:C:\Users\Administrator\AppData\Local\Programs\Python\Python38\Lib\site-packages\stylecloud
修改如下:
from icon_font_to_png.icon_font import IconFont
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import csv
import os
from PIL import Image
from matplotlib.colors import to_rgb
import numpy as np
import fire
from shutil import rmtree
from pkg_resources import resource_filename
from typing import List, Union
STATIC_PATH = resource_filename(__name__, "static")
def file_to_text(file_path: str):
"""
Reads a text file, or if the file is a .csv,
read as a dict of word/weights.
"""
if not file_path.endswith(".csv"):
with open(file_path, "r", encoding="utf-8") as f:
text = f.read()
return text
else: # parse as a CSV
with open(file_path, "r", encoding="utf-8") as f:
r = csv.reader(f)
header = next(r)
assert len(header) <= 2, "The input CSV has too many columns."
# If a single-column CSV, read as a bulk text
if len(header) == 1:
texts = ""
for row in r:
texts += row[0] + "\n"
# If a two-column CSV, read as words/weights
elif len(header) == 2:
texts = {}
for row in r:
texts[row[0]] = float(row[1])
return texts
def gen_fa_mask(
icon_name: str = "fas fa-grin",
size: int = 512,
icon_dir: str = ".temp",
pro_icon_path: str = None,
pro_css_path: str = None,
):
"""
Generates a Font Awesome icon mask from the given FA prefix + name.
"""
# FA prefixes which map to a font file.
font_files = {
"fas": "fa-solid-900.ttf",
"far": "fa-regular-400.ttf",
"fab": "fa-brands-400.ttf",
}
icon_prefix = icon_name.split(" ")[0]
icon_name_raw = icon_name.split(" ")[1]
css_path = pro_css_path or os.path.join(STATIC_PATH, "fontawesome.min.css")
ttf_path = pro_icon_path or os.path.join(STATIC_PATH, font_files[icon_prefix])
icon = IconFont(css_file=css_path, ttf_file=ttf_path)
# If a length and width are provided, make icon the smaller of the two
if isinstance(size, tuple):
size = min(size)
icon.export_icon(
icon=icon_name_raw[len(icon.common_prefix) :],
size=size,
filename="icon.png",
export_dir=icon_dir,
)
def gen_palette(palette: str):
"""Generates the corresponding palette function from `palettable`."""
palette_split = palette.split(".")
palette_name = palette_split[-1]
# https://stackoverflow.com/a/6677505
palette_func = getattr(
__import__(
"palettable.{}".format(".".join(palette_split[:-1])),
fromlist=[palette_name],
),
palette_name,
)
return palette_func
def gen_mask_array(icon_dir: str, invert_mask: bool, size: int):
"""Generates a numpy array of an icon mask."""
icon = Image.open(os.path.join(icon_dir, "icon.png"))
if isinstance(size, int):
size = (size, size)
# https://stackoverflow.com/a/2563883
icon_w, icon_h = icon.size
icon_mask = Image.new("RGBA", icon.size, (255, 255, 255, 255))
icon_mask.paste(icon, icon)
mask = Image.new("RGBA", size, (255, 255, 255, 255))
mask_w, mask_h = mask.size
offset = ((mask_w - icon_w) // 2, (mask_h - icon_h) // 2)
mask.paste(icon_mask, offset)
mask_array = np.array(mask, dtype="uint8")
if invert_mask:
mask_array = np.invert(mask_array)
return mask_array
def gen_gradient_mask(
size: int,
palette: str,
icon_dir: str = ".temp",
gradient_dir: str = "horizontal",
invert_mask: bool = False,
):
"""Generates a gradient color mask from a specified palette."""
mask_array = gen_mask_array(icon_dir, invert_mask, size)
mask_array = np.float32(mask_array)
palette_func = gen_palette(palette)
gradient = palette_func.mpl_colormap(np.linspace(0.0, 1.0, size))
# matplotlib color maps are from range of (0, 1). Convert to RGB.
gradient *= 255.0
# Add new axis and repeat gradient across it.
gradient = np.tile(gradient, (size, 1, 1))
# if vertical, transpose the gradient.
if gradient_dir == "vertical":
gradient = np.transpose(gradient, (1, 0, 2))
# Turn any nonwhite pixels on the icon into the gradient colors.
white = (255.0, 255.0, 255.0, 255.0)
mask_array[mask_array != white] = gradient[mask_array != white]
image_colors = ImageColorGenerator(mask_array)
return image_colors, np.uint8(mask_array)
def color_to_rgb(color):
"""Converts a color to a RGB tuple from (0-255)."""
if isinstance(color, tuple):
# if a RGB tuple already
return color
else:
# to_rgb() returns colors from (0-1)
color = tuple(int(x * 255) for x in to_rgb(color))
return color
def gen_stylecloud(
bg: str='', # 新增自定义蒙版参数
scale: int = 5,
text: str = None,
file_path: str = None,
size: int = 512,
mode='RGBA',
icon_name: str = "fas fa-flag",
palette: str = "cartocolors.qualitative.Bold_5",
colors: Union[str, List[str]] = None,
background_color: str = "white",
max_font_size: int = 200,
max_words: int = 2000,
stopwords: bool = True,
custom_stopwords: Union[List[str], set] = STOPWORDS,
add_stopwords: bool = False,
icon_dir: str = ".temp",
output_name: str = "stylecloud.png",
gradient: str = None,
font_path: str = os.path.join(STATIC_PATH, "Staatliches-Regular.ttf"),
random_state: int = None,
collocations: bool = True,
invert_mask: bool = False,
pro_icon_path: str = None,
pro_css_path: str = None,
):
"""Generates a stylecloud!
:param text: Input text. Best used if calling the function directly.
:param file_path: File path of the input text/CSV. Best used on the CLI.
:param size: Size (length and width in pixels) of the stylecloud.
:param icon_name: Icon Name for the stylecloud shape. (e.g. 'fas fa-grin')
:param palette: Color palette (via palettable)
:param colors: Custom color(s) for text (name or hex). Overrides palette.
:param background_color: Background color (name or hex).
:param max_font_size: Maximum font size in the stylecloud.
:param max_words: Maximum number of words to include in the stylecloud.
:param stopwords: Boolean to filter out common stopwords.
:param custom_stopwords: list of custom stopwords.
:param add_stopwords: Whether to use custom_stopwords to add to default
:param icon_dir: Temp directory to store the icon mask image.
:param output_name: Output file name of the stylecloud.
:param gradient: Direction of gradient. (if not None, will use gradient)
:param font_path: Path to .ttf file for font to use in stylecloud.
:param random_state: Controls random state of words and colors.
:param collocations: Whether to include collocations (bigrams) of two words.
:param invert_mask: Whether to invert the icon mask.
:param pro_icon_path: Path to Font Awesome Pro .ttf file if using FA Pro.
:param pro_css_path: Path to Font Awesome Pro .css file if using FA Pro.
"""
assert any([text, file_path]), "Either text or file_path must be specified."
if file_path:
text = file_to_text(file_path)
gen_fa_mask(icon_name, size, icon_dir, pro_icon_path, pro_css_path)
if gradient and colors is None:
pal_colors, mask_array = gen_gradient_mask(
size, palette, icon_dir, gradient, invert_mask
)
else: # Color each word randomly from the palette
mask_array = gen_mask_array(icon_dir, invert_mask, size)
if colors:
# if specifying a single color string
if isinstance(colors, str):
colors = [colors]
# iterate through each color to ensure correct RGB format.
# see matplotlib docs on how colors are decoded:
# https://matplotlib.org/3.1.1/api/colors_api.html
colors = [color_to_rgb(color) for color in colors]
else:
palette_func = gen_palette(palette)
colors = palette_func.colors
def pal_colors(word, font_size, position, orientation, random_state, **kwargs):
rand_color = np.random.randint(0, len(colors))
return tuple(colors[rand_color])
if add_stopwords:
custom_stopwords.extend(STOPWORDS)
# cleanup icon folder
rmtree(icon_dir)
wc = WordCloud(
background_color=background_color,
font_path=font_path,
max_words=max_words,
mask=mask_array if len(bg)==0 else bg, # 快看,这个是 蒙版设置参数
stopwords=custom_stopwords if stopwords else None,
max_font_size=max_font_size,
random_state=random_state,
collocations=collocations,
mode = mode,
scale = scale,
)
# generate word cloud
if isinstance(text, str):
wc.generate_from_text(text)
else: # i.e. a dict of word:value from a CSV
if stopwords: # manually remove stopwords since otherwise ignored
text = {k: v for k, v in text.items() if k not in custom_stopwords}
wc.generate_from_frequencies(text)
wc.recolor(color_func=pal_colors, random_state=random_state)
wc.to_file(output_name)
def stylecloud_cli(**kwargs):
"""Entrypoint for the stylecloud CLI."""
fire.Fire(gen_stylecloud)
以上stylecloud.py文件主要修改的是:
gen_stylecloud()参数修改如下:
bg: str='', # 新增自定义蒙版参数
scale: int = 5,
mode='RGBA',
WordCloud()中参数修改如下:
mask=mask_array if len(bg)==0 else bg, # 快看,这个是 蒙版设置参数
mode = mode,
scale = scale,
怎么执行了,我是初学者,记录如下:假设我写的py 在I:\MyQTGuitest\python\目录下
#执行pyton 可以直接在文件浏览器里 I:\MyQTGuitest\python\111.py
#要不cd 里 I:\MyQTGuitest\python I: 111.py
stylecloud 例子:
import jieba
import codecs
import numpy as np
from PIL import Image
#安装两次就可以看到安装路径了,如执行两次 pip install stylecloud。查看python则可以直接where python
#C:\Users\Administrator\AppData\Local\Programs\Python\Python38\Lib\site-packages\stylecloud
#修改自定义蒙板 https://mp.weixin.qq.com/s/TciqGM7cqGf5bspej07kzg
#https://ai.baidu.com/tech/body/seg
#fas fa-plane中的 fa-plane参考如下网页,只要在图票处右键,复制就有了 https://www.runoob.com/font-awesome/fontawesome-reference.html
# -*- coding: utf-8 -*-
import jieba
bg=np.array(Image.open("women.jpg"))
from stylecloud import gen_stylecloud
def cloud(file_name):
with open(file_name,'r',encoding='utf8') as f:
word_list = jieba.cut(f.read())
result = " ".join(word_list) #分词用 隔开
#制作中文云词
gen_stylecloud(bg=bg,
text=result,
size=(768, 512), # 512
font_path='I:/MyQTGuitest/lyss.ttf',
background_color= None,
palette='cartocolors.diverging.Fall_4',
icon_name='fas fa-plane',
output_name='t44.png',
gradient=None, # 梯度方向
) #必须加中文字体,否则格式错误
if __name__ == "__main__":
file_name = 'C:/word.txt'
cloud(file_name)
word.txt文件内容:
暮色西去,留下了淡淡的忧伤。半帘落霞里,醉晕星辰、月隐山巅。一柳落背的残影孤独着滑进了山弯,悄悄的沉眠。烟尘朦胧中,岁月匆匆流逝,追逐着一轮远远的梦想。半盏残垣、两堵城池,萤火中幽香夜色。
剪一段经年,写一篇过往,流年里风月缠绵、轻浅如禅。心阅一卷时光,悠然里安静赏花、无语草香。红尘幽幽,行走在尘世的的路上,花间写诗,月下饮酒。一盏琉璃的浮华,云淡风轻中失落。一半的静谧、一半的安祥,遗留着许多错过的情爱、伤过的心灵。生命中太多的笔墨,纸砚一池安然。一扇时光中,悠香着一缕淡淡的芬芳,相伴着一份柔柔的恬淡。一本书写满了人生、一段情温暖了爱恋、一首诗记忆了曾经、一杯茶起落了缘份。握着最美的遇见,写下珍惜。携着最暖的心语,写意温馨。
烟雨下的春绿,伏笔了盛夏的明朗。海棠落月的幽静,缠绵着银色的月光,星光执笔的萧瑟和鸣里,轻笛梵音。花月中秦时幛幔垂垂,风尘里汉疆硝烟漫漫。
岁月咫尺、天涯相望。尘世匆匆,遥遥万里。留一片春风,栽一山桃源,幽静的心灵里,刻下了多少人的风花雪月、悲欢离合。恍恍惚惚的秋梦里,又有多少人发出了长长的的叹息。年年过四季,岁岁是轮回。
人生自己温一壶茶,烫也罢,凉也罢,苦也可,甜也可,自己慢慢品慢慢尝。
回想一世,不记得了誓言,也忘记了当初的承诺。什么时候两鬓斑白、皱纹沟壑,什么时候去过桃源,什么时候带雨梨花,都成了过往一笺无字的纸。
指尖拨动着岁月的年轮,把曾经都碾成了粉末。洒在大海里、洒在山谷中、种在大树下、种在田园里。明年又会长出春绿、长出秋黄、长出冬雪。再执笔展开红尘,一首岁月一首诗,一韵平仄一片情。
烟雨尘世,岁月匆匆……
字体自己随便网上下要不系统找都可以
wordcloud例子:
from wordcloud import WordCloud, ImageColorGenerator
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import jieba.analyse
# 打开文本
text = open('C:/word.txt',encoding='utf8').read()
# 提取关键词和权重
freq = jieba.analyse.extract_tags(text, topK=200, withWeight=True)
print(freq[:20])
freq = {i[0]: i[1] for i in freq}
# colormap 的名称 可以在官方查到 matplotlibcolormaps
#根据字体属性更改颜色
def color_func(word,/,font_size,position,random_state,**kwargs):
#字体位置 y
if position[0]<500:
r=random_state.randint(0,40)# 相当于random.randint(0,40)
else:
r=random_state.randint(100,150)
#字体位置 x
if position[1]<500:
g=random_state.randint(0,40)
else:
g=random_state.randint(100,150)
#字体大小
if font_size<50:
b=random_state.randint(0,40)
else:
b=random_state.randint(100,150)
#返回一个rgb颜色元组
return (r,g,b)
#第一个参数我写的是 scale=4,这个数值越大,产生的图片分辨率越高,字迹越清晰。你可以调到64试试,我希望你的电脑足够快 /笑哭
# 生成对象
mask = np.array(Image.open("women.jpg"))#Result199.png
wc = WordCloud(scale=1,mask=mask, font_path='I:/MyQTGuitest/lyss.ttf', mode='RGBA', background_color=None).generate_from_frequencies(freq)
#wc = WordCloud(mask=mask, font_path='I:/MyQTGuitest/lyss.ttf', mode='RGBA', background_color=None,
# color_func=color_func,# 这里要将刚才写的函数传入 是自己设置色彩
# ).generate_from_frequencies(freq)
# 从图片中生成颜色
image_colors = ImageColorGenerator(mask)
wc.recolor(color_func=image_colors)
# 显示词云
plt.imshow(wc, interpolation='bilinear')
plt.axis("off")
plt.show()
# 保存到文件
wc.to_file('wordcloud9.png')
版权声明
本文为[还债大湿兄]所创,转载请带上原文链接,感谢
https://blog.csdn.net/qq_30377315/article/details/121625322
边栏推荐
- More reliable model art than deep learning
- An interesting interview question
- web三大组件(Servlet,Filter,Listener)
- 《Neo4j权威指南》简介,求伯君、周鸿袆、胡晓峰、周涛等大咖隆重推荐
- 初探 Lambda Powertools TypeScript
- 数据库管理软件SQLPro for SQLite for Mac 2022.30
- Detailed explanation of integer data type tinyint in MySQL
- MySQL索引优化之分页探索详细介绍
- @valid,@Validated 的学习笔记
- Learning go language 0x02: understanding slice
猜你喜欢
More reliable model art than deep learning
GO接口使用
第六站神京门户-------手机号码的转换
ConstraintLayout布局
Upgrade the functions available for cpolar intranet penetration
关于JUC三大常用辅助类
Visual solutions to common problems (VIII) mathematical formulas
Database management software sqlpro for SQLite for Mac 2022.30
After the MySQL router is reinstalled, it reconnects to the cluster for boot - a problem that has been configured in this host before
VM set up static virtual machine
随机推荐
Detailed explanation of integer data type tinyint in MySQL
Excel · VBA array bubble sorting function
小程序 支付
Get things technology network optimization - CDN resource request Optimization Practice
MBA-day5数学-应用题-工程问题
SVN的使用:
妊娠箱和分娩箱的区别
PlatoFarm推出正式版游戏经济模型的特点分析
Oracle连通性测试小工具
Usage of rename in cygwin
MySQL8.0升级的踩坑历险记
Mba-day5 Mathematics - application problems - engineering problems
colab
MBA - day5 mathématiques - Questions d'application - Questions d'ingénierie
Mysql8.0安装指南
Constraintlayout layout
Learning go language 0x02: understanding slice
About the three commonly used auxiliary classes of JUC
年度最尴尬的社死瞬间,是Siri给的
Which company is good for opening futures accounts? Who can recommend several safe and reliable futures companies?