Spaces:
Sleeping
Sleeping
File size: 1,414 Bytes
0b61b2b 10af70f 0b61b2b 10af70f 238794e 0b61b2b 10af70f 0b61b2b 10af70f 0b61b2b 10af70f 0b61b2b 238794e 10af70f 0b61b2b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import gradio as gr
import pandas as pd
import re
from collections import Counter
def process_excel(file):
# ์์
ํ์ผ ์ฝ๊ธฐ
df = pd.read_excel(file)
# D์ด์ ๋ฐ์ดํฐ ์ถ์ถ
product_names = df.iloc[:, 3].dropna() # D์ด์ 0๋ถํฐ ์์ํ๋ฏ๋ก index๋ 3
# ํค์๋ ์ถ์ถ ๋ฐ ๋น๋ ๊ณ์ฐ
all_keywords = []
for name in product_names:
# ํน์๋ฌธ์ ์ ๊ฑฐ ๋ฐ ๊ณต๋ฐฑ ๊ธฐ์ค์ผ๋ก ๋ถํ
words = re.sub(r'[^\w\s]', '', name).split()
# ์ค๋ณต ์ ๊ฑฐ
unique_words = set(words)
all_keywords.extend(unique_words)
# ๋น๋ ๊ณ์ฐ
keyword_counts = Counter(all_keywords)
# ๊ฒฐ๊ณผ๋ฅผ ๋ฐ์ดํฐํ๋ ์์ผ๋ก ์ ๋ฆฌ
result_df = pd.DataFrame(keyword_counts.items(), columns=['Keyword', 'Frequency'])
result_df = result_df.sort_values(by='Frequency', ascending=False).reset_index(drop=True)
# ์์
ํ์ผ๋ก ์ ์ฅ
output_file = "/mnt/data/keyword_counts.xlsx"
result_df.to_excel(output_file, index=False)
return output_file
# Gradio ์ธํฐํ์ด์ค ์ ์
iface = gr.Interface(
fn=process_excel,
inputs="file",
outputs="file",
title="Excel Keyword Extractor",
description="์์
ํ์ผ์ D์ด์์ ํค์๋๋ฅผ ์ถ์ถํ๊ณ ๋น๋๋ฅผ ๊ณ์ฐํ์ฌ ์๋ก์ด ์์
ํ์ผ๋ก ์ถ๋ ฅํฉ๋๋ค."
)
if __name__ == "__main__":
iface.launch()
|