|
import re |
|
import io |
|
import pandas as pd |
|
from collections import Counter |
|
import gradio as gr |
|
|
|
def process_excel(file_obj): |
|
df = pd.read_excel(file_obj) |
|
|
|
|
|
if len(df.columns) < 4: |
|
return "엑셀 파일에 D열이 존재하지 않습니다." |
|
|
|
product_col = df.iloc[3:, 3] |
|
|
|
freq_counter = Counter() |
|
for cell_value in product_col: |
|
if pd.isna(cell_value): |
|
continue |
|
text = str(cell_value) |
|
text_cleaned = re.sub(r'[^0-9a-zA-Z가-힣\s]', '', text) |
|
keywords = text_cleaned.split() |
|
keywords_unique = set(keywords) |
|
freq_counter.update(keywords_unique) |
|
|
|
sorted_freq = sorted(freq_counter.items(), key=lambda x: x[1], reverse=True) |
|
|
|
result_data = { |
|
"키워드": [item[0] for item in sorted_freq], |
|
"빈도": [item[1] for item in sorted_freq] |
|
} |
|
result_df = pd.DataFrame(result_data) |
|
|
|
|
|
output = io.BytesIO() |
|
with pd.ExcelWriter(output, engine='openpyxl') as writer: |
|
result_df.to_excel(writer, index=False, startrow=3, header=False) |
|
output.seek(0) |
|
|
|
|
|
return { |
|
"name": "keyword_frequency.xlsx", |
|
"mime_type": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", |
|
"data": output.getvalue() |
|
} |
|
|
|
def on_submit(excel_file): |
|
return process_excel(excel_file.name) |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("## 상품명에서 키워드 추출하기") |
|
excel_input = gr.File(label="엑셀 파일 업로드") |
|
download_button = gr.File(label="결과 파일 다운로드") |
|
|
|
run_button = gr.Button("분석 및 결과 생성") |
|
run_button.click( |
|
fn=on_submit, |
|
inputs=[excel_input], |
|
outputs=[download_button] |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|