Spaces:

SG34
/

test3

Sleeping

App Files Files Community

test3 / app.py

SG34

Update app.py

1dd8f10 verified 5 months ago

raw

history blame contribute delete

1.93 kB

	import re
	import io
	import pandas as pd
	from collections import Counter
	import gradio as gr

	def process_excel(file_obj):
	df = pd.read_excel(file_obj)

	# D열에서 4번째 행(D4)부터 끝까지 추출
	if len(df.columns) < 4:
	return "엑셀 파일에 D열이 존재하지 않습니다."

	product_col = df.iloc[3:, 3] # 3은 0-based로 D열

	freq_counter = Counter()
	for cell_value in product_col:
	if pd.isna(cell_value):
	continue
	text = str(cell_value)
	text_cleaned = re.sub(r'[^0-9a-zA-Z가-힣\s]', '', text)
	keywords = text_cleaned.split()
	keywords_unique = set(keywords)
	freq_counter.update(keywords_unique)

	sorted_freq = sorted(freq_counter.items(), key=lambda x: x[1], reverse=True)

	result_data = {
	"키워드": [item[0] for item in sorted_freq],
	"빈도": [item[1] for item in sorted_freq]
	}
	result_df = pd.DataFrame(result_data)

	# 메모리에 엑셀 작성
	output = io.BytesIO()
	with pd.ExcelWriter(output, engine='openpyxl') as writer:
	result_df.to_excel(writer, index=False, startrow=3, header=False)
	output.seek(0)

	# Gradio가 인식할 수 있도록 딕셔너리 반환
	return {
	"name": "keyword_frequency.xlsx",
	"mime_type": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
	"data": output.getvalue()
	}

	def on_submit(excel_file):
	return process_excel(excel_file.name)

	with gr.Blocks() as demo:
	gr.Markdown("## 상품명에서 키워드 추출하기")
	excel_input = gr.File(label="엑셀 파일 업로드")
	download_button = gr.File(label="결과 파일 다운로드")

	run_button = gr.Button("분석 및 결과 생성")
	run_button.click(
	fn=on_submit,
	inputs=[excel_input],
	outputs=[download_button]
	)

	if __name__ == "__main__":
	demo.launch()