Spaces:
Sleeping
Sleeping
import matplotlib.pyplot as plt | |
import numpy as np | |
import requests | |
import jieba | |
import csv | |
from PIL import Image | |
from collections import Counter | |
from wordcloud import WordCloud | |
def wordCloud(csv_url): | |
r = requests.get(csv_url) | |
with open('wordCloudData/news.csv', 'wb') as f: | |
f.write(r.content) | |
news = [] | |
## Read csv file from news.csv | |
with open('wordCloudData/news.csv', 'r', encoding='utf-8-sig') as f: | |
reader = csv.DictReader(f) | |
for row in reader: | |
news.append({ | |
'title': row['Title'], | |
'content': row['Content'] | |
}) | |
# Replace_words | |
replace_list = [ | |
'\n', | |
'\r', | |
'<br />', | |
'<br>', | |
'的', | |
'及', | |
'於', | |
'並', | |
'113' | |
] | |
with open('wordCloudData/news.txt', 'w', encoding='utf-8') as f: | |
for n in news: | |
try: | |
title = n['title'] | |
content = n['content'] | |
for replace_word in replace_list: | |
title = title.replace(replace_word, ' ') | |
content = content.replace(replace_word, ' ') | |
f.write(title + '\n') | |
f.write(content + '\n') | |
f.write('\n') | |
# print(n) | |
except: | |
pass | |
text = open('wordCloudData/news.txt', "r",encoding="utf-8").read() #讀文字資料 | |
jieba.set_dictionary('wordCloudData/dict.txt') | |
with open('wordCloudData/stopWord_test.txt', 'r', encoding='utf-8-sig') as f: #設定停用詞 | |
stops = f.read().split('\n') | |
terms = [] #儲存字詞 | |
for t in jieba.cut(text, cut_all=False): #拆解句子為字詞 | |
if t not in stops: #不是停用詞 | |
terms.append(t) | |
diction = Counter([x for x in terms if x != '\n']) | |
font = 'wordCloudData/msyh.ttc' #設定字型 | |
mask = np.array(Image.open("wordCloudData/car.jpg")) #設定文字雲形狀 | |
wordcloud = WordCloud(font_path=font) | |
wordcloud = WordCloud(background_color="white",mask=mask,font_path=font) #背景顏色預設黑色,改為白色 | |
wordcloud.generate_from_frequencies(frequencies=diction) #產生文字雲 | |
#產生圖片 | |
plt.figure(figsize=(6,6)) | |
plt.imshow(wordcloud) | |
plt.axis("off") | |
wordcloud.to_file("wordCloudData/news_Wordcloud.png") #存檔 | |
return Image.open("wordCloudData/news_Wordcloud.png") |