from janome.tokenizer import Tokenizer
from wordcloud import WordCloud
import gradio as gr

from urllib.request import urlopen
from bs4 import BeautifulSoup


def generate_cloud(*args):
    text, width, height, background_color = args

    # default setting
    width = width if width is not None else 1024
    height = height if height is not None else 768

    t = Tokenizer()

    tokens = t.tokenize(text)

    wc = WordCloud(
        width=int(width), 
        height=int(height), 
        regexp="[\w']+", 
        font_path="./ipaexm00401/ipaexm.ttf",
        background_color=background_color)

    nouns = [token.base_form for token in tokens if token.part_of_speech.startswith('名詞')]
    nouns = (" ").join(nouns)
    wc.generate(nouns)
    return wc.to_array()


def get_text_from_url(*args):
    url, width, height, background_color = args
    
    # URLからHTMLを取得
    html = urlopen(url)

    # HTMLからbodyのテキストを抽出
    soup = BeautifulSoup(html, "html.parser")
    body_text = soup.body.get_text()

    return generate_cloud(body_text, width, height, background_color)


from_text = gr.Interface(
    fn=generate_cloud, 
    inputs=[
        gr.Textbox(label="入力テキスト", value="ここに日本語のテキストを入力します．"),
        gr.Number(value=1024, label="横幅（デフォルト値：1024）"),
        gr.Number(value=768, label="高さ（デフォルト値：768）"),
        gr.Radio(choices=["black", "white"], value="black", label="背景色")
        ], 
    outputs=gr.Image(type="pil"),
    title="☁️にほんご　わーどくらうど☁"
    )


from_url = gr.Interface(
    fn=get_text_from_url, 
    inputs=[
        gr.Textbox(label="URL", value="https://gradio.app/"),
        gr.Number(value=1024, label="横幅（デフォルト値：1024）"),
        gr.Number(value=768, label="高さ（デフォルト値：768）"),
        gr.Radio(choices=["black", "white"], value="black", label="背景色")
        ], 
    outputs=gr.Image(type="pil"),
    title="☁️にほんご　わーどくらうど☁"
    )


demo = gr.TabbedInterface([from_text, from_url], ["from_text", "from_url"])

demo.launch()