kawayui commited on
Commit
453a744
1 Parent(s): 17c7c67

add application and requirements

Browse files
Files changed (2) hide show
  1. app.py +75 -0
  2. requirements.txt +190 -0
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from fugashi import Tagger
5
+ import re
6
+ import random
7
+ import matplotlib.pyplot as plt
8
+ import japanize_matplotlib
9
+ import seaborn as sns
10
+ from gensim.corpora.dictionary import Dictionary
11
+ from gensim import models
12
+ from gensim.models.word2vec import Word2Vec
13
+ from gensim import similarities
14
+
15
+ sns.set(font='IPAexGothic')
16
+
17
+ # モデルなどの読み込み
18
+ pattern = r"[^\u3040-\u30ff\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\u20000-\u2ffff\sa-zA-Z]" # 記号を取り除くための正規表現
19
+ df = pd.read_csv("./raw_corpus.csv") # コーパス
20
+ dictionary = Dictionary.load("./livedoor.dict") # 辞書
21
+ lda = models.ldamodel.LdaModel.load("./lda.model") # トピックモデル
22
+ index = similarities.MatrixSimilarity.load("./lda.index") # トピックモデルによるコーパスのindex
23
+ word_dist = lda.get_topics() # トピックモデルの単語分布(K, V)
24
+ w2v = Word2Vec.load('./word2vec.gensim.model') # 白やぎword2vec https://github.com/shiroyagicorp/japanese-word2vec-model-builder
25
+
26
+ # トピックモデルの情報(トピックごとの単語分布)のDataFrame化
27
+ num_words = 30
28
+ topic_list = []
29
+ word_list = []
30
+ weight_list = []
31
+ for n, values in lda.show_topics(num_topics=5, num_words=num_words, formatted=False):
32
+ for word, weight in values:
33
+ topic_list.append(n)
34
+ word_list.append(word)
35
+ weight_list.append(round(float(weight) * 100, 2))
36
+
37
+ topic_df = pd.DataFrame()
38
+ topic_df["topic"] = topic_list
39
+ topic_df["word"] = word_list
40
+ topic_df["weight"] = weight_list
41
+
42
+
43
+ st.sidebar.markdown("Set Parameter")
44
+
45
+ #word_list = ["任天堂", "バイオハザード", "ポケモン"]
46
+ #atom = st.sidebar.radio("元になる単語", word_list)
47
+ #negative = st.sidebar.radio("ー引く単語", word_list)
48
+ #positive = st.sidebar.radio("+足す単語", word_list)
49
+
50
+
51
+
52
+ st.header("word2vecによるアナロジー")
53
+ st.subheader("単語の演算")
54
+ st.caption("演算対象の単語")
55
+ col1, col2, col3 = st.columns(3)
56
+
57
+ with col1:
58
+ #atom = st.text_input("元になる単語", atom)
59
+ atom = st.text_input("元になる単語")
60
+
61
+ with col2:
62
+ #negative = st.text_input("ー引く単語", negative)
63
+ negative = st.text_input("ー引く単語")
64
+
65
+ with col3:
66
+ #positive = st.text_input("+足す単語", positive)
67
+ positive = st.text_input("+足す単語")
68
+
69
+ button = st.button("演算する")
70
+
71
+ if button:
72
+ st.text(f"{atom} - {negative} + {positive}")
73
+ x = w2v.wv.most_similar(positive=[atom, positive], negative=negative)
74
+ #st.text(f"{x}")
75
+ st.dataframe(x)
requirements.txt ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==0.15.0
2
+ aiohttp==3.8.3
3
+ aiosignal==1.3.1
4
+ altair==4.2.0
5
+ anyio==3.6.2
6
+ appnope==0.1.3
7
+ argon2-cffi==21.3.0
8
+ argon2-cffi-bindings==21.2.0
9
+ arrow==1.2.3
10
+ asttokens==2.2.1
11
+ async-timeout==4.0.2
12
+ attrs==22.2.0
13
+ backcall==0.2.0
14
+ beautifulsoup4==4.11.1
15
+ bleach==5.0.1
16
+ blinker==1.5
17
+ cachetools==5.3.0
18
+ certifi==2022.12.7
19
+ cffi==1.15.1
20
+ charset-normalizer==2.1.1
21
+ click==8.1.3
22
+ comm==0.1.2
23
+ contourpy==1.0.6
24
+ cycler==0.11.0
25
+ datasets==2.8.0
26
+ debugpy==1.6.4
27
+ decorator==5.1.1
28
+ defusedxml==0.7.1
29
+ dill==0.3.6
30
+ entrypoints==0.4
31
+ evaluate==0.4.0
32
+ executing==1.2.0
33
+ fastapi==0.89.0
34
+ fastjsonschema==2.16.2
35
+ ffmpy==0.3.0
36
+ filelock==3.9.0
37
+ fonttools==4.38.0
38
+ fqdn==1.5.1
39
+ frozenlist==1.3.3
40
+ fsspec==2022.11.0
41
+ fst-pso==1.8.1
42
+ fugashi==1.2.1
43
+ FuzzyTM==2.0.5
44
+ gensim==4.3.0
45
+ gitdb==4.0.10
46
+ GitPython==3.1.30
47
+ gradio==3.16.1
48
+ h11==0.14.0
49
+ httpcore==0.16.3
50
+ httpx==0.23.3
51
+ huggingface-hub==0.11.1
52
+ idna==3.4
53
+ importlib-metadata==5.2.0
54
+ ipadic==1.0.0
55
+ ipykernel==6.19.4
56
+ ipython==8.7.0
57
+ ipython-genutils==0.2.0
58
+ ipywidgets==8.0.4
59
+ isoduration==20.11.0
60
+ japanize-matplotlib==1.1.3
61
+ jedi==0.18.2
62
+ Jinja2==3.1.2
63
+ joblib==1.2.0
64
+ jsonpointer==2.3
65
+ jsonschema==4.17.3
66
+ jupyter==1.0.0
67
+ jupyter-console==6.4.4
68
+ jupyter-events==0.5.0
69
+ jupyter_client==7.4.8
70
+ jupyter_core==5.1.1
71
+ jupyter_server==2.0.6
72
+ jupyter_server_terminals==0.4.3
73
+ jupyterlab-pygments==0.2.2
74
+ jupyterlab-widgets==3.0.5
75
+ kiwisolver==1.4.4
76
+ linkify-it-py==1.0.3
77
+ markdown-it-py==2.1.0
78
+ MarkupSafe==2.1.1
79
+ matplotlib==3.6.2
80
+ matplotlib-inline==0.1.6
81
+ mdit-py-plugins==0.3.3
82
+ mdurl==0.1.2
83
+ miniful==0.0.6
84
+ mistune==2.0.4
85
+ multidict==6.0.4
86
+ multiprocess==0.70.14
87
+ nbclassic==0.4.8
88
+ nbclient==0.7.2
89
+ nbconvert==7.2.7
90
+ nbformat==5.7.1
91
+ nest-asyncio==1.5.6
92
+ notebook==6.5.2
93
+ notebook_shim==0.2.2
94
+ numpy==1.24.1
95
+ orjson==3.8.4
96
+ packaging==22.0
97
+ pandas==1.5.2
98
+ pandocfilters==1.5.0
99
+ parso==0.8.3
100
+ pexpect==4.8.0
101
+ pickleshare==0.7.5
102
+ Pillow==9.3.0
103
+ plac==1.3.5
104
+ platformdirs==2.6.2
105
+ polars==0.15.17
106
+ portalocker==2.6.0
107
+ prometheus-client==0.15.0
108
+ prompt-toolkit==3.0.36
109
+ protobuf==3.20.1
110
+ psutil==5.9.4
111
+ ptyprocess==0.7.0
112
+ pure-eval==0.2.2
113
+ pyarrow==10.0.1
114
+ pycparser==2.21
115
+ pycryptodome==3.16.0
116
+ pydantic==1.10.4
117
+ pydeck==0.8.0
118
+ pydub==0.25.1
119
+ pyFUME==0.2.25
120
+ Pygments==2.13.0
121
+ Pympler==1.0.1
122
+ pyparsing==3.0.9
123
+ pyrsistent==0.19.3
124
+ python-dateutil==2.8.2
125
+ python-json-logger==2.0.4
126
+ python-multipart==0.0.5
127
+ pytz==2022.7
128
+ pytz-deprecation-shim==0.1.0.post0
129
+ PyYAML==6.0
130
+ pyzmq==24.0.1
131
+ qtconsole==5.4.0
132
+ QtPy==2.3.0
133
+ regex==2022.10.31
134
+ requests==2.28.1
135
+ responses==0.18.0
136
+ rfc3339-validator==0.1.4
137
+ rfc3986==1.5.0
138
+ rfc3986-validator==0.1.1
139
+ rich==13.2.0
140
+ scikit-learn==1.2.0
141
+ scipy==1.10.0
142
+ seaborn==0.12.2
143
+ semver==2.13.0
144
+ Send2Trash==1.8.0
145
+ sentencepiece==0.1.97
146
+ simpful==2.9.0
147
+ six==1.16.0
148
+ smart-open==6.3.0
149
+ smmap==5.0.0
150
+ sniffio==1.3.0
151
+ soupsieve==2.3.2.post1
152
+ stack-data==0.6.2
153
+ starlette==0.22.0
154
+ streamlit==1.17.0
155
+ terminado==0.17.1
156
+ threadpoolctl==3.1.0
157
+ tinycss2==1.2.1
158
+ tokenizers==0.13.2
159
+ toml==0.10.2
160
+ toolz==0.12.0
161
+ torch==1.13.1
162
+ torchaudio==0.13.1
163
+ torchdata==0.5.1
164
+ torchtext==0.14.1
165
+ torchvision==0.14.1
166
+ tornado==6.2
167
+ tqdm==4.64.1
168
+ traitlets==5.8.0
169
+ transformers==4.25.1
170
+ typing_extensions==4.4.0
171
+ tzdata==2022.7
172
+ tzlocal==4.2
173
+ uc-micro-py==1.0.1
174
+ unidic==1.1.0
175
+ unidic-lite==1.0.8
176
+ uri-template==1.2.0
177
+ urllib3==1.26.13
178
+ uvicorn==0.20.0
179
+ validators==0.20.0
180
+ wasabi==0.10.1
181
+ wcwidth==0.2.5
182
+ webcolors==1.12
183
+ webencodings==0.5.1
184
+ websocket-client==1.4.2
185
+ websockets==10.4
186
+ widgetsnbextension==4.0.5
187
+ wordcloud==1.8.2.2
188
+ xxhash==3.2.0
189
+ yarl==1.8.2
190
+ zipp==3.11.0