当前位置:网站首页>stylecloud ,wordcloud 库学习及使用例子
stylecloud ,wordcloud 库学习及使用例子
2022-04-23 11:15:00 【还债大湿兄】
安装pyhton
安装库 stylecloud
pip install numpy
pip install pillow
pip install matplotlib
pip install wordcloud
pip install stylecloud
因为要修改源代码stylecloud,所以要查看路径
#安装两次就可以看到安装路径了,如执行两次 pip install numpy。查看python则可以直接where python
因为stylecloud 默认不能设置模板及背景色透明,根据网上才哥(详细见【推荐收藏】介绍2种Python绘制词云的手法,你会偷偷pick谁呢?)的可做如下修改:
我的库安装路径:C:\Users\Administrator\AppData\Local\Programs\Python\Python38\Lib\site-packages\stylecloud
修改如下:
from icon_font_to_png.icon_font import IconFont
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import csv
import os
from PIL import Image
from matplotlib.colors import to_rgb
import numpy as np
import fire
from shutil import rmtree
from pkg_resources import resource_filename
from typing import List, Union
STATIC_PATH = resource_filename(__name__, "static")
def file_to_text(file_path: str):
"""
Reads a text file, or if the file is a .csv,
read as a dict of word/weights.
"""
if not file_path.endswith(".csv"):
with open(file_path, "r", encoding="utf-8") as f:
text = f.read()
return text
else: # parse as a CSV
with open(file_path, "r", encoding="utf-8") as f:
r = csv.reader(f)
header = next(r)
assert len(header) <= 2, "The input CSV has too many columns."
# If a single-column CSV, read as a bulk text
if len(header) == 1:
texts = ""
for row in r:
texts += row[0] + "\n"
# If a two-column CSV, read as words/weights
elif len(header) == 2:
texts = {}
for row in r:
texts[row[0]] = float(row[1])
return texts
def gen_fa_mask(
icon_name: str = "fas fa-grin",
size: int = 512,
icon_dir: str = ".temp",
pro_icon_path: str = None,
pro_css_path: str = None,
):
"""
Generates a Font Awesome icon mask from the given FA prefix + name.
"""
# FA prefixes which map to a font file.
font_files = {
"fas": "fa-solid-900.ttf",
"far": "fa-regular-400.ttf",
"fab": "fa-brands-400.ttf",
}
icon_prefix = icon_name.split(" ")[0]
icon_name_raw = icon_name.split(" ")[1]
css_path = pro_css_path or os.path.join(STATIC_PATH, "fontawesome.min.css")
ttf_path = pro_icon_path or os.path.join(STATIC_PATH, font_files[icon_prefix])
icon = IconFont(css_file=css_path, ttf_file=ttf_path)
# If a length and width are provided, make icon the smaller of the two
if isinstance(size, tuple):
size = min(size)
icon.export_icon(
icon=icon_name_raw[len(icon.common_prefix) :],
size=size,
filename="icon.png",
export_dir=icon_dir,
)
def gen_palette(palette: str):
"""Generates the corresponding palette function from `palettable`."""
palette_split = palette.split(".")
palette_name = palette_split[-1]
# https://stackoverflow.com/a/6677505
palette_func = getattr(
__import__(
"palettable.{}".format(".".join(palette_split[:-1])),
fromlist=[palette_name],
),
palette_name,
)
return palette_func
def gen_mask_array(icon_dir: str, invert_mask: bool, size: int):
"""Generates a numpy array of an icon mask."""
icon = Image.open(os.path.join(icon_dir, "icon.png"))
if isinstance(size, int):
size = (size, size)
# https://stackoverflow.com/a/2563883
icon_w, icon_h = icon.size
icon_mask = Image.new("RGBA", icon.size, (255, 255, 255, 255))
icon_mask.paste(icon, icon)
mask = Image.new("RGBA", size, (255, 255, 255, 255))
mask_w, mask_h = mask.size
offset = ((mask_w - icon_w) // 2, (mask_h - icon_h) // 2)
mask.paste(icon_mask, offset)
mask_array = np.array(mask, dtype="uint8")
if invert_mask:
mask_array = np.invert(mask_array)
return mask_array
def gen_gradient_mask(
size: int,
palette: str,
icon_dir: str = ".temp",
gradient_dir: str = "horizontal",
invert_mask: bool = False,
):
"""Generates a gradient color mask from a specified palette."""
mask_array = gen_mask_array(icon_dir, invert_mask, size)
mask_array = np.float32(mask_array)
palette_func = gen_palette(palette)
gradient = palette_func.mpl_colormap(np.linspace(0.0, 1.0, size))
# matplotlib color maps are from range of (0, 1). Convert to RGB.
gradient *= 255.0
# Add new axis and repeat gradient across it.
gradient = np.tile(gradient, (size, 1, 1))
# if vertical, transpose the gradient.
if gradient_dir == "vertical":
gradient = np.transpose(gradient, (1, 0, 2))
# Turn any nonwhite pixels on the icon into the gradient colors.
white = (255.0, 255.0, 255.0, 255.0)
mask_array[mask_array != white] = gradient[mask_array != white]
image_colors = ImageColorGenerator(mask_array)
return image_colors, np.uint8(mask_array)
def color_to_rgb(color):
"""Converts a color to a RGB tuple from (0-255)."""
if isinstance(color, tuple):
# if a RGB tuple already
return color
else:
# to_rgb() returns colors from (0-1)
color = tuple(int(x * 255) for x in to_rgb(color))
return color
def gen_stylecloud(
bg: str='', # 新增自定义蒙版参数
scale: int = 5,
text: str = None,
file_path: str = None,
size: int = 512,
mode='RGBA',
icon_name: str = "fas fa-flag",
palette: str = "cartocolors.qualitative.Bold_5",
colors: Union[str, List[str]] = None,
background_color: str = "white",
max_font_size: int = 200,
max_words: int = 2000,
stopwords: bool = True,
custom_stopwords: Union[List[str], set] = STOPWORDS,
add_stopwords: bool = False,
icon_dir: str = ".temp",
output_name: str = "stylecloud.png",
gradient: str = None,
font_path: str = os.path.join(STATIC_PATH, "Staatliches-Regular.ttf"),
random_state: int = None,
collocations: bool = True,
invert_mask: bool = False,
pro_icon_path: str = None,
pro_css_path: str = None,
):
"""Generates a stylecloud!
:param text: Input text. Best used if calling the function directly.
:param file_path: File path of the input text/CSV. Best used on the CLI.
:param size: Size (length and width in pixels) of the stylecloud.
:param icon_name: Icon Name for the stylecloud shape. (e.g. 'fas fa-grin')
:param palette: Color palette (via palettable)
:param colors: Custom color(s) for text (name or hex). Overrides palette.
:param background_color: Background color (name or hex).
:param max_font_size: Maximum font size in the stylecloud.
:param max_words: Maximum number of words to include in the stylecloud.
:param stopwords: Boolean to filter out common stopwords.
:param custom_stopwords: list of custom stopwords.
:param add_stopwords: Whether to use custom_stopwords to add to default
:param icon_dir: Temp directory to store the icon mask image.
:param output_name: Output file name of the stylecloud.
:param gradient: Direction of gradient. (if not None, will use gradient)
:param font_path: Path to .ttf file for font to use in stylecloud.
:param random_state: Controls random state of words and colors.
:param collocations: Whether to include collocations (bigrams) of two words.
:param invert_mask: Whether to invert the icon mask.
:param pro_icon_path: Path to Font Awesome Pro .ttf file if using FA Pro.
:param pro_css_path: Path to Font Awesome Pro .css file if using FA Pro.
"""
assert any([text, file_path]), "Either text or file_path must be specified."
if file_path:
text = file_to_text(file_path)
gen_fa_mask(icon_name, size, icon_dir, pro_icon_path, pro_css_path)
if gradient and colors is None:
pal_colors, mask_array = gen_gradient_mask(
size, palette, icon_dir, gradient, invert_mask
)
else: # Color each word randomly from the palette
mask_array = gen_mask_array(icon_dir, invert_mask, size)
if colors:
# if specifying a single color string
if isinstance(colors, str):
colors = [colors]
# iterate through each color to ensure correct RGB format.
# see matplotlib docs on how colors are decoded:
# https://matplotlib.org/3.1.1/api/colors_api.html
colors = [color_to_rgb(color) for color in colors]
else:
palette_func = gen_palette(palette)
colors = palette_func.colors
def pal_colors(word, font_size, position, orientation, random_state, **kwargs):
rand_color = np.random.randint(0, len(colors))
return tuple(colors[rand_color])
if add_stopwords:
custom_stopwords.extend(STOPWORDS)
# cleanup icon folder
rmtree(icon_dir)
wc = WordCloud(
background_color=background_color,
font_path=font_path,
max_words=max_words,
mask=mask_array if len(bg)==0 else bg, # 快看,这个是 蒙版设置参数
stopwords=custom_stopwords if stopwords else None,
max_font_size=max_font_size,
random_state=random_state,
collocations=collocations,
mode = mode,
scale = scale,
)
# generate word cloud
if isinstance(text, str):
wc.generate_from_text(text)
else: # i.e. a dict of word:value from a CSV
if stopwords: # manually remove stopwords since otherwise ignored
text = {k: v for k, v in text.items() if k not in custom_stopwords}
wc.generate_from_frequencies(text)
wc.recolor(color_func=pal_colors, random_state=random_state)
wc.to_file(output_name)
def stylecloud_cli(**kwargs):
"""Entrypoint for the stylecloud CLI."""
fire.Fire(gen_stylecloud)
以上stylecloud.py文件主要修改的是:
gen_stylecloud()参数修改如下:
bg: str='', # 新增自定义蒙版参数
scale: int = 5,
mode='RGBA',
WordCloud()中参数修改如下:
mask=mask_array if len(bg)==0 else bg, # 快看,这个是 蒙版设置参数
mode = mode,
scale = scale,
怎么执行了,我是初学者,记录如下:假设我写的py 在I:\MyQTGuitest\python\目录下
#执行pyton 可以直接在文件浏览器里 I:\MyQTGuitest\python\111.py
#要不cd 里 I:\MyQTGuitest\python I: 111.py
stylecloud 例子:
import jieba
import codecs
import numpy as np
from PIL import Image
#安装两次就可以看到安装路径了,如执行两次 pip install stylecloud。查看python则可以直接where python
#C:\Users\Administrator\AppData\Local\Programs\Python\Python38\Lib\site-packages\stylecloud
#修改自定义蒙板 https://mp.weixin.qq.com/s/TciqGM7cqGf5bspej07kzg
#https://ai.baidu.com/tech/body/seg
#fas fa-plane中的 fa-plane参考如下网页,只要在图票处右键,复制就有了 https://www.runoob.com/font-awesome/fontawesome-reference.html
# -*- coding: utf-8 -*-
import jieba
bg=np.array(Image.open("women.jpg"))
from stylecloud import gen_stylecloud
def cloud(file_name):
with open(file_name,'r',encoding='utf8') as f:
word_list = jieba.cut(f.read())
result = " ".join(word_list) #分词用 隔开
#制作中文云词
gen_stylecloud(bg=bg,
text=result,
size=(768, 512), # 512
font_path='I:/MyQTGuitest/lyss.ttf',
background_color= None,
palette='cartocolors.diverging.Fall_4',
icon_name='fas fa-plane',
output_name='t44.png',
gradient=None, # 梯度方向
) #必须加中文字体,否则格式错误
if __name__ == "__main__":
file_name = 'C:/word.txt'
cloud(file_name)
word.txt文件内容:
暮色西去,留下了淡淡的忧伤。半帘落霞里,醉晕星辰、月隐山巅。一柳落背的残影孤独着滑进了山弯,悄悄的沉眠。烟尘朦胧中,岁月匆匆流逝,追逐着一轮远远的梦想。半盏残垣、两堵城池,萤火中幽香夜色。
剪一段经年,写一篇过往,流年里风月缠绵、轻浅如禅。心阅一卷时光,悠然里安静赏花、无语草香。红尘幽幽,行走在尘世的的路上,花间写诗,月下饮酒。一盏琉璃的浮华,云淡风轻中失落。一半的静谧、一半的安祥,遗留着许多错过的情爱、伤过的心灵。生命中太多的笔墨,纸砚一池安然。一扇时光中,悠香着一缕淡淡的芬芳,相伴着一份柔柔的恬淡。一本书写满了人生、一段情温暖了爱恋、一首诗记忆了曾经、一杯茶起落了缘份。握着最美的遇见,写下珍惜。携着最暖的心语,写意温馨。
烟雨下的春绿,伏笔了盛夏的明朗。海棠落月的幽静,缠绵着银色的月光,星光执笔的萧瑟和鸣里,轻笛梵音。花月中秦时幛幔垂垂,风尘里汉疆硝烟漫漫。
岁月咫尺、天涯相望。尘世匆匆,遥遥万里。留一片春风,栽一山桃源,幽静的心灵里,刻下了多少人的风花雪月、悲欢离合。恍恍惚惚的秋梦里,又有多少人发出了长长的的叹息。年年过四季,岁岁是轮回。
人生自己温一壶茶,烫也罢,凉也罢,苦也可,甜也可,自己慢慢品慢慢尝。
回想一世,不记得了誓言,也忘记了当初的承诺。什么时候两鬓斑白、皱纹沟壑,什么时候去过桃源,什么时候带雨梨花,都成了过往一笺无字的纸。
指尖拨动着岁月的年轮,把曾经都碾成了粉末。洒在大海里、洒在山谷中、种在大树下、种在田园里。明年又会长出春绿、长出秋黄、长出冬雪。再执笔展开红尘,一首岁月一首诗,一韵平仄一片情。
烟雨尘世,岁月匆匆……
字体自己随便网上下要不系统找都可以
wordcloud例子:
from wordcloud import WordCloud, ImageColorGenerator
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import jieba.analyse
# 打开文本
text = open('C:/word.txt',encoding='utf8').read()
# 提取关键词和权重
freq = jieba.analyse.extract_tags(text, topK=200, withWeight=True)
print(freq[:20])
freq = {i[0]: i[1] for i in freq}
# colormap 的名称 可以在官方查到 matplotlibcolormaps
#根据字体属性更改颜色
def color_func(word,/,font_size,position,random_state,**kwargs):
#字体位置 y
if position[0]<500:
r=random_state.randint(0,40)# 相当于random.randint(0,40)
else:
r=random_state.randint(100,150)
#字体位置 x
if position[1]<500:
g=random_state.randint(0,40)
else:
g=random_state.randint(100,150)
#字体大小
if font_size<50:
b=random_state.randint(0,40)
else:
b=random_state.randint(100,150)
#返回一个rgb颜色元组
return (r,g,b)
#第一个参数我写的是 scale=4,这个数值越大,产生的图片分辨率越高,字迹越清晰。你可以调到64试试,我希望你的电脑足够快 /笑哭
# 生成对象
mask = np.array(Image.open("women.jpg"))#Result199.png
wc = WordCloud(scale=1,mask=mask, font_path='I:/MyQTGuitest/lyss.ttf', mode='RGBA', background_color=None).generate_from_frequencies(freq)
#wc = WordCloud(mask=mask, font_path='I:/MyQTGuitest/lyss.ttf', mode='RGBA', background_color=None,
# color_func=color_func,# 这里要将刚才写的函数传入 是自己设置色彩
# ).generate_from_frequencies(freq)
# 从图片中生成颜色
image_colors = ImageColorGenerator(mask)
wc.recolor(color_func=image_colors)
# 显示词云
plt.imshow(wc, interpolation='bilinear')
plt.axis("off")
plt.show()
# 保存到文件
wc.to_file('wordcloud9.png')
版权声明
本文为[还债大湿兄]所创,转载请带上原文链接,感谢
https://blog.csdn.net/qq_30377315/article/details/121625322
边栏推荐
- 升级cpolar内网穿透能获得的功能
- Google Earth engine (GEE) - scale up the original image (taking Hainan as an example)
- MySQL partition table can be classified by month
- Visual common drawing (III) area map
- 妊娠箱和分娩箱的区别
- Typora operation skill description (I) md
- Common parameters of ffmpeg command line
- Solutions to common problems in visualization (IX) background color
- Solve the problem of "suncertpathbuilderexception: unable to find valid certification path to requested target"
- An interesting interview question
猜你喜欢
Visualization Road (10) detailed explanation of segmentation canvas function
CUMCM 2021-B:乙醇偶合制備C4烯烴(2)
PDMS soft lithography process
《Neo4j权威指南》简介,求伯君、周鸿袆、胡晓峰、周涛等大咖隆重推荐
Cygwin 中的 rename 用法
Mysql8.0安装指南
Solutions to common problems in visualization (VIII) solutions to problems in shared drawing area
Cumcm 2021 - B: préparation d'oléfines C4 par couplage éthanol (2)
MySQL Router重装后重新连接集群进行引导出现的——此主机中之前已配置过的问题
MIT: label every pixel in the world with unsupervised! Humans: no more 800 hours for an hour of video
随机推荐
How to quickly query 10 million pieces of data in MySQL
2022爱分析· 工业互联网厂商全景报告
采用百度飞桨EasyDL完成指定目标识别
期货开户哪个公司好?安全靠谱的期货公司谁能推荐几家?
Implementation of partition table of existing data table by MySQL
闹钟场景识别
Source insight 4.0 FAQs
Mysql8.0安装指南
使用 PHP PDO ODBC 示例的 Microsoft Access 数据库
Mysql8. 0 installation guide
26. Delete duplicates in ordered array
Solutions to common problems in visualization (VIII) solutions to problems in shared drawing area
Mysql中有关Datetime和Timestamp的使用总结
学习 Go 语言 0x05:《Go 语言之旅》中映射(map)的练习题代码
学习 Go 语言 0x07:《Go 语言之旅》中 Stringer 练习题代码
mysql分表之后如何平滑上线详解
R-Drop:更强大的Dropout正则方法
redis优化系列(二)Redis主从原理、主从常用配置
学习 Go 语言 0x02:对切片 Slice 的理解
MIT:用无监督为世界上每个像素都打上标签!人类:再也不用为1小时视频花800个小时了