Spaces:

Kims12
/

1_4_keyword

Sleeping

File size: 1,414 Bytes

0b61b2b
10af70f
0b61b2b
10af70f
238794e
0b61b2b
10af70f
0b61b2b
 
 
 
 
 
10af70f
0b61b2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10af70f
0b61b2b
 
 
 
 
 
 
 
 
 
 
238794e
10af70f
0b61b2b

import gradio as gr
import pandas as pd
import re
from collections import Counter

def process_excel(file):
    # 엑셀 파일 읽기
    df = pd.read_excel(file)
    
    # D열의 데이터 추출
    product_names = df.iloc[:, 3].dropna()  # D열은 0부터 시작하므로 index는 3
    
    # 키워드 추출 및 빈도 계산
    all_keywords = []
    
    for name in product_names:
        # 특수문자 제거 및 공백 기준으로 분할
        words = re.sub(r'[^\w\s]', '', name).split()
        # 중복 제거
        unique_words = set(words)
        all_keywords.extend(unique_words)
    
    # 빈도 계산
    keyword_counts = Counter(all_keywords)
    
    # 결과를 데이터프레임으로 정리
    result_df = pd.DataFrame(keyword_counts.items(), columns=['Keyword', 'Frequency'])
    result_df = result_df.sort_values(by='Frequency', ascending=False).reset_index(drop=True)
    
    # 엑셀 파일로 저장
    output_file = "/mnt/data/keyword_counts.xlsx"
    result_df.to_excel(output_file, index=False)
    
    return output_file

# Gradio 인터페이스 정의
iface = gr.Interface(
    fn=process_excel, 
    inputs="file", 
    outputs="file",
    title="Excel Keyword Extractor",
    description="엑셀 파일의 D열에서 키워드를 추출하고 빈도를 계산하여 새로운 엑셀 파일로 출력합니다."
)

if __name__ == "__main__":
    iface.launch()