word_cloud / app.py
szk1ck's picture
デフォルトの入力を追加
0b270c7
from janome.tokenizer import Tokenizer
from wordcloud import WordCloud
import gradio as gr
from urllib.request import urlopen
from bs4 import BeautifulSoup
def generate_cloud(*args):
text, width, height, background_color = args
# default setting
width = width if width is not None else 1024
height = height if height is not None else 768
t = Tokenizer()
tokens = t.tokenize(text)
wc = WordCloud(
width=int(width),
height=int(height),
regexp="[\w']+",
font_path="./ipaexm00401/ipaexm.ttf",
background_color=background_color)
nouns = [token.base_form for token in tokens if token.part_of_speech.startswith('名詞')]
nouns = (" ").join(nouns)
wc.generate(nouns)
return wc.to_array()
def get_text_from_url(*args):
url, width, height, background_color = args
# URLからHTMLを取得
html = urlopen(url)
# HTMLからbodyのテキストを抽出
soup = BeautifulSoup(html, "html.parser")
body_text = soup.body.get_text()
return generate_cloud(body_text, width, height, background_color)
from_text = gr.Interface(
fn=generate_cloud,
inputs=[
gr.Textbox(label="入力テキスト", value="ここに日本語のテキストを入力します."),
gr.Number(value=1024, label="横幅(デフォルト値:1024)"),
gr.Number(value=768, label="高さ(デフォルト値:768)"),
gr.Radio(choices=["black", "white"], value="black", label="背景色")
],
outputs=gr.Image(type="pil"),
title="☁️にほんご わーどくらうど☁"
)
from_url = gr.Interface(
fn=get_text_from_url,
inputs=[
gr.Textbox(label="URL", value="https://gradio.app/"),
gr.Number(value=1024, label="横幅(デフォルト値:1024)"),
gr.Number(value=768, label="高さ(デフォルト値:768)"),
gr.Radio(choices=["black", "white"], value="black", label="背景色")
],
outputs=gr.Image(type="pil"),
title="☁️にほんご わーどくらうど☁"
)
demo = gr.TabbedInterface([from_text, from_url], ["from_text", "from_url"])
demo.launch()