from janome.tokenizer import Tokenizer from wordcloud import WordCloud import gradio as gr from urllib.request import urlopen from bs4 import BeautifulSoup def generate_cloud(*args): text, width, height, background_color = args # default setting width = width if width is not None else 1024 height = height if height is not None else 768 t = Tokenizer() tokens = t.tokenize(text) wc = WordCloud( width=int(width), height=int(height), regexp="[\w']+", font_path="./ipaexm00401/ipaexm.ttf", background_color=background_color) nouns = [token.base_form for token in tokens if token.part_of_speech.startswith('名詞')] nouns = (" ").join(nouns) wc.generate(nouns) return wc.to_array() def get_text_from_url(*args): url, width, height, background_color = args # URLからHTMLを取得 html = urlopen(url) # HTMLからbodyのテキストを抽出 soup = BeautifulSoup(html, "html.parser") body_text = soup.body.get_text() return generate_cloud(body_text, width, height, background_color) from_text = gr.Interface( fn=generate_cloud, inputs=[ gr.Textbox(label="入力テキスト", value="ここに日本語のテキストを入力します."), gr.Number(value=1024, label="横幅(デフォルト値:1024)"), gr.Number(value=768, label="高さ(デフォルト値:768)"), gr.Radio(choices=["black", "white"], value="black", label="背景色") ], outputs=gr.Image(type="pil"), title="☁️にほんご わーどくらうど☁" ) from_url = gr.Interface( fn=get_text_from_url, inputs=[ gr.Textbox(label="URL", value="https://gradio.app/"), gr.Number(value=1024, label="横幅(デフォルト値:1024)"), gr.Number(value=768, label="高さ(デフォルト値:768)"), gr.Radio(choices=["black", "white"], value="black", label="背景色") ], outputs=gr.Image(type="pil"), title="☁️にほんご わーどくらうど☁" ) demo = gr.TabbedInterface([from_text, from_url], ["from_text", "from_url"]) demo.launch()