ssboost commited on
Commit
575105b
ยท
verified ยท
1 Parent(s): 7dad958

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -30
app.py CHANGED
@@ -2,6 +2,11 @@ import gradio as gr
2
  import pandas as pd
3
  import tempfile
4
  import re
 
 
 
 
 
5
 
6
  def preprocess_text(text: str) -> str:
7
  """
@@ -22,6 +27,9 @@ def expand_columns_if_needed(df, needed_index: int):
22
  # ๋งจ ๋์— ๋นˆ ์—ด ์ถ”๊ฐ€
23
  df[df.shape[1]] = None
24
 
 
 
 
25
 
26
  def count_keywords(main_text, excel_file, direct_input):
27
  """
@@ -129,40 +137,119 @@ def count_keywords(main_text, excel_file, direct_input):
129
 
130
  return (md_table, tmp_path)
131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
  ########################
134
- # Gradio ์ธํ„ฐํŽ˜์ด์Šค #
135
  ########################
136
- with gr.Blocks() as demo:
137
- gr.Markdown("## ๋ณธ๋ฌธ & ํ‚ค์›Œ๋“œ ๋ถ„์„ - (A5~A10000, N5~N10000)")
138
 
139
- with gr.Row():
140
- with gr.Column():
141
- main_textbox = gr.Textbox(
142
- label="๋ณธ๋ฌธ ํ…์ŠคํŠธ",
143
- lines=16,
144
- placeholder="์—ฌ๊ธฐ์— ๊ธด ๋ณธ๋ฌธ์„ ๋ถ™์—ฌ๋„ฃ์œผ์„ธ์š”. ํ•œ๊ธ€๋งŒ ๋‚จ๊ธฐ๊ณ  ๋‚˜๋จธ์ง€๋Š” ์ œ๊ฑฐ๋ฉ๋‹ˆ๋‹ค."
145
- )
146
- with gr.Column():
147
- keyword_input = gr.Textbox(
148
- label="(์„ ํƒ) ์ง์ ‘ ์ž…๋ ฅ ํ‚ค์›Œ๋“œ - ์—”ํ„ฐ๋กœ ๊ตฌ๋ถ„",
149
- lines=6,
150
- placeholder="์˜ˆ)\n์ดˆ์ŒํŒŒ๊ฐ€์Šต๊ธฐ\n๊ฐ€์Šต๊ธฐ\n..."
151
- )
152
- excel_input = gr.File(
153
- label="(์„ ํƒ) ์—‘์…€ ์—…๋กœ๋“œ (A5~A10000=ํ‚ค์›Œ๋“œ, N5~N10000=์นด์šดํŠธ)",
154
- file_types=[".xlsx"]
155
- )
156
- run_button = gr.Button("์นด์šดํŠธํ•˜๊ธฐ")
157
-
158
- output_md = gr.Markdown(label="๊ฒฐ๊ณผ ํ‘œ")
159
- output_file = gr.File(label="๊ฒฐ๊ณผ ์—‘์…€ ๋‹ค์šด๋กœ๋“œ")
160
-
161
- run_button.click(
162
- fn=count_keywords,
163
- inputs=[main_textbox, excel_input, keyword_input],
164
- outputs=[output_md, output_file]
165
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
  if __name__ == "__main__":
168
  demo.launch()
 
2
  import pandas as pd
3
  import tempfile
4
  import re
5
+ from mecab import MeCab
6
+
7
+ ##############################
8
+ # 1) ๊ณตํ†ต ํ•จ์ˆ˜๋“ค
9
+ ##############################
10
 
11
  def preprocess_text(text: str) -> str:
12
  """
 
27
  # ๋งจ ๋์— ๋นˆ ์—ด ์ถ”๊ฐ€
28
  df[df.shape[1]] = None
29
 
30
+ ##############################
31
+ # 2) ํ‚ค์›Œ๋“œ ์นด์šดํŠธ ํ•จ์ˆ˜
32
+ ##############################
33
 
34
  def count_keywords(main_text, excel_file, direct_input):
35
  """
 
137
 
138
  return (md_table, tmp_path)
139
 
140
+ ##############################
141
+ # 3) ํ˜•ํƒœ์†Œ ๋ถ„์„ ๊ธฐ๋ฐ˜ ํ‚ค์›Œ๋“œ ์นด์šดํŠธ ํ•จ์ˆ˜
142
+ ##############################
143
+
144
+ def morph_analysis_and_count(text: str):
145
+ """
146
+ 1) ์ž…๋ ฅ๋œ ํ…์ŠคํŠธ์—์„œ ํ•œ๊ธ€๋งŒ ๋‚จ๊น€
147
+ 2) Mecab ํ˜•ํƒœ์†Œ ๋ถ„์„ (python-mecab-ko)
148
+ 3) ๋ช…์‚ฌ ๋ฐ ๋ณตํ•ฉ๋ช…์‚ฌ๋งŒ ์ถ”์ถœ
149
+ 4) ๊ฐ ํ‚ค์›Œ๋“œ๋ฅผ ๋ณธ๋ฌธ์—์„œ ๋‹ค์‹œ ๊ฒ€์ƒ‰ํ•˜์—ฌ ๋นˆ๋„ ์นด์šดํŠธ
150
+ """
151
+ # 1) ์ „์ฒ˜๋ฆฌ
152
+ cleaned = preprocess_text(text)
153
+
154
+ # 2) Mecab ๋ถ„์„
155
+ tagger = MeCab()
156
+ parsed = tagger.pos(cleaned) # ์˜ˆ: [('์ดˆ์ŒํŒŒ๊ฐ€์Šต๊ธฐ', 'NNG'), ('ํšจ๊ณผ', 'NNG'), ...]
157
+
158
+ # 3) ๋ช…์‚ฌ ๋ฐ ๋ณตํ•ฉ๋ช…์‚ฌ๋งŒ ์ถ”์ถœ
159
+ noun_tags = ['NNG', 'NNP', 'NP', 'NNB'] # ํ•„์š”ํ•œ ํ’ˆ์‚ฌ ํƒœ๊ทธ
160
+ nouns = [word for (word, pos) in parsed if pos in noun_tags]
161
+
162
+ # ์ค‘๋ณต ์ œ๊ฑฐํ•˜์—ฌ ๊ณ ์œ  ํ‚ค์›Œ๋“œ ๋ฆฌ์ŠคํŠธ ์ƒ์„ฑ
163
+ unique_nouns = list(set(nouns))
164
+
165
+ # 4) ๊ฐ ํ‚ค์›Œ๋“œ๋ฅผ ๋ณธ๋ฌธ์—์„œ ๊ฒ€์ƒ‰ํ•˜์—ฌ ๋นˆ๋„ ์นด์šดํŠธ
166
+ freq_dict = {}
167
+ for noun in unique_nouns:
168
+ count = cleaned.count(noun)
169
+ freq_dict[noun] = count
170
+
171
+ # ๋นˆ๋„์ˆ˜๊ฐ€ 1 ์ด์ƒ์ธ ํ‚ค์›Œ๋“œ๋งŒ ํ•„ํ„ฐ๋ง
172
+ filtered_freq = {k: v for k, v in freq_dict.items() if v > 0}
173
+
174
+ if not filtered_freq:
175
+ return "์ถ”์ถœ๋œ ๋ช…์‚ฌ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", None
176
+
177
+ # ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„ ์ƒ์„ฑ ๋ฐ ์ •๋ ฌ
178
+ freq_df = pd.DataFrame(list(filtered_freq.items()), columns=['๋ช…์‚ฌ', '๋นˆ๋„'])
179
+ freq_df = freq_df.sort_values(by='๋นˆ๋„', ascending=False).reset_index(drop=True)
180
+
181
+ # ๊ฒฐ๊ณผ ํ‘œ๋ฅผ Markdown ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜
182
+ try:
183
+ md_table = freq_df.to_markdown(index=False)
184
+ except ImportError:
185
+ md_table = "Markdown ๋ณ€ํ™˜์„ ์œ„ํ•ด 'tabulate' ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค."
186
+ return md_table, None
187
+
188
+ # CSV ํŒŒ์ผ๋กœ ์ €์žฅ
189
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
190
+ freq_df.to_csv(tmp.name, index=False, encoding='utf-8-sig')
191
+ tmp_path = tmp.name
192
+
193
+ return md_table, tmp_path
194
 
195
  ########################
196
+ # 4) Gradio ์ธํ„ฐํŽ˜์ด์Šค #
197
  ########################
 
 
198
 
199
+ with gr.Blocks() as demo:
200
+ with gr.Tab("ํ‚ค์›Œ๋“œ ์นด์šดํŠธ"):
201
+ with gr.Row():
202
+ # ์™ผ์ชฝ ์ž…๋ ฅ ์˜์—ญ
203
+ with gr.Column():
204
+ main_textbox = gr.Textbox(
205
+ label="๋ณธ๋ฌธ ํ…์ŠคํŠธ",
206
+ lines=16,
207
+ placeholder="์—ฌ๊ธฐ์— ๊ธด ๋ณธ๋ฌธ์„ ๋ถ™์—ฌ๋„ฃ์œผ์„ธ์š”."
208
+ )
209
+ keyword_input = gr.Textbox(
210
+ label="(์„ ํƒ) ์ง์ ‘ ์ž…๋ ฅ ํ‚ค์›Œ๋“œ - ์—”ํ„ฐ๋กœ ๊ตฌ๋ถ„",
211
+ lines=6,
212
+ placeholder="์˜ˆ)\n์ดˆ์ŒํŒŒ๊ฐ€์Šต๊ธฐ\n๊ฐ€์Šต๊ธฐ\n..."
213
+ )
214
+ excel_input = gr.File(
215
+ label="(์„ ํƒ) ์—‘์…€ ์—…๋กœ๋“œ",
216
+ file_types=[".xlsx"]
217
+ )
218
+ run_button = gr.Button("๋ถ„์„ํ•˜๊ธฐ")
219
+
220
+ # ์˜ค๋ฅธ์ชฝ ์ถœ๋ ฅ ์˜์—ญ
221
+ with gr.Column():
222
+ output_md = gr.Markdown(label="๊ฒฐ๊ณผ ํ‘œ")
223
+ output_file = gr.File(label="๊ฒฐ๊ณผ ๋‹ค์šด๋กœ๋“œ")
224
+
225
+ run_button.click(
226
+ fn=count_keywords,
227
+ inputs=[main_textbox, excel_input, keyword_input],
228
+ outputs=[output_md, output_file]
229
+ )
230
+
231
+ with gr.Tab("ํ˜•ํƒœ์†Œ ๋ถ„์„ ๊ธฐ๋ฐ˜ ์นด์šดํŠธ"):
232
+ with gr.Row():
233
+ # ์™ผ์ชฝ ์ž…๋ ฅ ์˜์—ญ
234
+ with gr.Column():
235
+ morph_text_input = gr.Textbox(
236
+ label="๋ณธ๋ฌธ ํ…์ŠคํŠธ",
237
+ lines=16,
238
+ placeholder="์—ฌ๊ธฐ์— ๊ธด ๋ณธ๋ฌธ์„ ๋ถ™์—ฌ๋„ฃ์œผ์„ธ์š”."
239
+ )
240
+ morph_run_button = gr.Button("๋ถ„์„ํ•˜๊ธฐ")
241
+
242
+ # ์˜ค๋ฅธ์ชฝ ์ถœ๋ ฅ ์˜์—ญ
243
+ with gr.Column():
244
+ morph_result_display = gr.Markdown(label="๋ถ„์„ ๊ฒฐ๊ณผ")
245
+ morph_download_button = gr.File(label="๊ฒฐ๊ณผ ๋‹ค์šด๋กœ๋“œ")
246
+
247
+ morph_run_button.click(
248
+ fn=morph_analysis_and_count,
249
+ inputs=morph_text_input,
250
+ outputs=[morph_result_display, morph_download_button]
251
+ )
252
+
253
 
254
  if __name__ == "__main__":
255
  demo.launch()