word_cloud / app.py
szk1ck's picture
デフォルトの入力を追加
0b270c7
raw
history blame
No virus
2.2 kB
from janome.tokenizer import Tokenizer
from wordcloud import WordCloud
import gradio as gr
from urllib.request import urlopen
from bs4 import BeautifulSoup
def generate_cloud(*args):
text, width, height, background_color = args
# default setting
width = width if width is not None else 1024
height = height if height is not None else 768
t = Tokenizer()
tokens = t.tokenize(text)
wc = WordCloud(
width=int(width),
height=int(height),
regexp="[\w']+",
font_path="./ipaexm00401/ipaexm.ttf",
background_color=background_color)
nouns = [token.base_form for token in tokens if token.part_of_speech.startswith('名詞')]
nouns = (" ").join(nouns)
wc.generate(nouns)
return wc.to_array()
def get_text_from_url(*args):
url, width, height, background_color = args
# URLからHTMLを取得
html = urlopen(url)
# HTMLからbodyのテキストを抽出
soup = BeautifulSoup(html, "html.parser")
body_text = soup.body.get_text()
return generate_cloud(body_text, width, height, background_color)
from_text = gr.Interface(
fn=generate_cloud,
inputs=[
gr.Textbox(label="入力テキスト", value="ここに日本語のテキストを入力します."),
gr.Number(value=1024, label="横幅(デフォルト値:1024)"),
gr.Number(value=768, label="高さ(デフォルト値:768)"),
gr.Radio(choices=["black", "white"], value="black", label="背景色")
],
outputs=gr.Image(type="pil"),
title="☁️にほんご わーどくらうど☁"
)
from_url = gr.Interface(
fn=get_text_from_url,
inputs=[
gr.Textbox(label="URL", value="https://gradio.app/"),
gr.Number(value=1024, label="横幅(デフォルト値:1024)"),
gr.Number(value=768, label="高さ(デフォルト値:768)"),
gr.Radio(choices=["black", "white"], value="black", label="背景色")
],
outputs=gr.Image(type="pil"),
title="☁️にほんご わーどくらうど☁"
)
demo = gr.TabbedInterface([from_text, from_url], ["from_text", "from_url"])
demo.launch()