File size: 2,200 Bytes
5a9e2db
 
 
 
4543aa3
 
 
5127fc0
4543aa3
6f53875
 
 
 
 
 
 
5a9e2db
 
 
 
728d2ce
 
 
 
6f53875
 
728d2ce
5a9e2db
 
 
5127fc0
5a9e2db
 
6f53875
 
4543aa3
 
 
 
 
 
 
 
6f53875
4543aa3
 
 
6f53875
728d2ce
0b270c7
4543aa3
6f53875
 
4543aa3
 
 
 
 
 
 
6f53875
4543aa3
0b270c7
4543aa3
6f53875
 
728d2ce
5a9e2db
728d2ce
4543aa3
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from janome.tokenizer import Tokenizer
from wordcloud import WordCloud
import gradio as gr

from urllib.request import urlopen
from bs4 import BeautifulSoup



def generate_cloud(*args):
    text, width, height, background_color = args

    # default setting
    width = width if width is not None else 1024
    height = height if height is not None else 768

    t = Tokenizer()

    tokens = t.tokenize(text)

    wc = WordCloud(
        width=int(width), 
        height=int(height), 
        regexp="[\w']+", 
        font_path="./ipaexm00401/ipaexm.ttf",
        background_color=background_color)

    nouns = [token.base_form for token in tokens if token.part_of_speech.startswith('名詞')]
    nouns = (" ").join(nouns)
    wc.generate(nouns)
    return wc.to_array()


def get_text_from_url(*args):
    url, width, height, background_color = args
    
    # URLからHTMLを取得
    html = urlopen(url)

    # HTMLからbodyのテキストを抽出
    soup = BeautifulSoup(html, "html.parser")
    body_text = soup.body.get_text()

    return generate_cloud(body_text, width, height, background_color)


from_text = gr.Interface(
    fn=generate_cloud, 
    inputs=[
        gr.Textbox(label="入力テキスト", value="ここに日本語のテキストを入力します."),
        gr.Number(value=1024, label="横幅(デフォルト値:1024)"),
        gr.Number(value=768, label="高さ(デフォルト値:768)"),
        gr.Radio(choices=["black", "white"], value="black", label="背景色")
        ], 
    outputs=gr.Image(type="pil"),
    title="☁️にほんご わーどくらうど☁"
    )


from_url = gr.Interface(
    fn=get_text_from_url, 
    inputs=[
        gr.Textbox(label="URL", value="https://gradio.app/"),
        gr.Number(value=1024, label="横幅(デフォルト値:1024)"),
        gr.Number(value=768, label="高さ(デフォルト値:768)"),
        gr.Radio(choices=["black", "white"], value="black", label="背景色")
        ], 
    outputs=gr.Image(type="pil"),
    title="☁️にほんご わーどくらうど☁"
    )


demo = gr.TabbedInterface([from_text, from_url], ["from_text", "from_url"])

demo.launch()