Spaces:

ssboost
/

word_b

Running

App Files Files Community

ssboost commited on 25 days ago

Commit

ddf4061

verified ·

1 Parent(s): 4a70618

Update app.py

Browse files

Files changed (1) hide show

app.py +2 -255

app.py CHANGED Viewed

@@ -1,255 +1,2 @@
-import gradio as gr
-import pandas as pd
-import tempfile
-import re
-from mecab import MeCab
-##############################
-# 1) 공통 함수들
-##############################
-def preprocess_text(text: str) -> str:
-    """
-    쉼표, 마침표, 공백, 숫자, 영어 등
-    한글(가-힣) 이외의 문자를 모두 제거하고
-    한글만 연속으로 남긴다.
-    """
-    return re.sub(r'[^가-힣]', '', text)
-def expand_columns_if_needed(df, needed_index: int):
-    """
-    df에 (needed_index + 1)번째 열이 존재하지 않으면
-    임시로 확장해서 빈 열을 만든다.
-    예) needed_index=13 → N열(14번째 열)을 쓰려면
-       df.shape[1]이 14 이상이 되도록 확장
-    """
-    while df.shape[1] <= needed_index:
-        # 맨 끝에 빈 열 추가
-        df[df.shape[1]] = None
-##############################
-# 2) 키워드 카운트 함수
-##############################
-def count_keywords(main_text, excel_file, direct_input):
-    """
-    - 직접 입력 키워드(줄바꿈 구분)가 있으면 우선 사용(A열=키워드, B열=카운트)
-    - 없으면 엑셀 사용:
-      * 헤더를 사용하지 않음(header=None) → 1행 그대로 보존
-      * A5~A10000: 키워드
-      * N5~N10000: 카운트 기록(열 인덱스 13)
-    - 본문은 한글만 남기고 .count(키워드)로 등장 횟수를 계산
-    - 1회 이상인 키워드만 결과 표(Markdown)에 표시
-    """
-    # 본문 전처리
-    cleaned_text = preprocess_text(main_text)
-    direct_input = direct_input.strip()
-    if direct_input:
-        # ===== 직접 입력 키워드 사용 =====
-        keywords = [kw.strip() for kw in direct_input.split('\n') if kw.strip()]
-        if not keywords:
-            return ("직접 입력 키워드가 없습니다.", None)
-        # counts
-        counts = [cleaned_text.count(k) for k in keywords]
-        # 1회 이상 필터
-        filtered = [(k, c) for k, c in zip(keywords, counts) if c > 0]
-        if not filtered:
-            # 전부 0회
-            msg = "본문에 해당 키워드가 전혀 등장하지 않았습니다."
-            # 그래도 결과 엑셀(A,B) 만들어서 반환
-            tmp_df = pd.DataFrame({"A": keywords, "B": counts})
-            with tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") as tmp:
-                tmp_df.to_excel(tmp.name, index=False, header=False)
-                # header=False → 1행에 "A,B" 같은 열이름 안 쓰도록
-                tmp_path = tmp.name
-            return (msg, tmp_path)
-        # 1회 이상 표(Markdown)
-        lines = ["| 키워드 | 등장 횟수 |", "|---|---|"]
-        for (k, c) in filtered:
-            lines.append(f"| {k} | {c} |")
-        md_table = "\n".join(lines)
-        # 엑셀(A,B) 저장
-        tmp_df = pd.DataFrame({"A": keywords, "B": counts})
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") as tmp:
-            tmp_df.to_excel(tmp.name, index=False, header=False)
-            tmp_path = tmp.name
-        return (md_table, tmp_path)
-    else:
-        # ===== 엑셀 파일 사용 =====
-        if not excel_file:
-            return ("엑셀 파일을 업로드하거나 키워드를 직접 입력하세요.", None)
-        # 1) 엑셀 전체를 header=None로 읽음 → 1행 그대로 보존
-        df = pd.read_excel(excel_file.name, header=None)
-        # 2) A5~A10000 → (인덱스 4~9999) 키워드
-        max_row = min(df.shape[0], 10000)  # 실제 행 개수 vs 10000 중 더 작은 것
-        sub_df = df.iloc[4:max_row, 0]     # 첫 번째 열(인덱스=0)
-        # strip + NaN 제거
-        keywords = sub_df.dropna().astype(str).apply(lambda x: x.strip()).tolist()
-        if not keywords:
-            return ("A5~A10000 범위에 키워드가 없습니다.", None)
-        # counts
-        counts = [cleaned_text.count(k) for k in keywords]
-        # 1회 이상 필터
-        filtered = [(k, c) for k, c in zip(keywords, counts) if c > 0]
-        if not filtered:
-            msg = "본문에 해당 키워드가 전혀 등장하지 않았습니다(0회)."
-            # 그래도 N5~N10000에 기록
-            expand_columns_if_needed(df, 13)  # N열=13
-            for i, cnt_val in enumerate(counts):
-                row_idx = 4 + i
-                if row_idx < df.shape[0]:
-                    df.iloc[row_idx, 13] = cnt_val
-            with tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") as tmp:
-                df.to_excel(tmp.name, index=False, header=False)
-                tmp_path = tmp.name
-            return (msg, tmp_path)
-        # 1회 이상 표(Markdown)
-        lines = ["| 키워드 | 등장 횟수 |", "|---|---|"]
-        for (k, c) in filtered:
-            lines.append(f"| {k} | {c} |")
-        md_table = "\n".join(lines)
-        # N5~N10000에 기록
-        expand_columns_if_needed(df, 13)  # 열이 14개 미만이면 N열(13)까지 확장
-        for i, cnt_val in enumerate(counts):
-            row_idx = 4 + i
-            if row_idx < df.shape[0]:
-                df.iloc[row_idx, 13] = cnt_val
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") as tmp:
-            df.to_excel(tmp.name, index=False, header=False)
-            tmp_path = tmp.name
-        return (md_table, tmp_path)
-##############################
-# 3) 형태소 분석 기반 키워드 카운트 함수
-##############################
-def morph_analysis_and_count(text: str):
-    """
-    1) 입력된 텍스트에서 한글만 남김
-    2) Mecab 형태소 분석 (python-mecab-ko)
-    3) 명사 및 복합명사만 추출
-    4) 각 키워드를 본문에서 다시 검색하여 빈도 카운트
-    """
-    # 1) 전처리
-    cleaned = preprocess_text(text)
-    # 2) Mecab 분석
-    tagger = MeCab()
-    parsed = tagger.pos(cleaned)  # 예: [('초음파가습기', 'NNG'), ('효과', 'NNG'), ...]
-    # 3) 명사 및 복합명사만 추출
-    noun_tags = ['NNG', 'NNP', 'NP', 'NNB']  # 필요한 품사 태그
-    nouns = [word for (word, pos) in parsed if pos in noun_tags]
-    # 중복 제거하여 고유 키워드 리스트 생성
-    unique_nouns = list(set(nouns))
-    # 4) 각 키워드를 본문에서 검색하여 빈도 카운트
-    freq_dict = {}
-    for noun in unique_nouns:
-        count = cleaned.count(noun)
-        freq_dict[noun] = count
-    # 빈도수가 1 이상인 키워드만 필터링
-    filtered_freq = {k: v for k, v in freq_dict.items() if v > 0}
-    if not filtered_freq:
-        return "추출된 명사가 없습니다.", None
-    # 데이터프레임 생성 및 정렬
-    freq_df = pd.DataFrame(list(filtered_freq.items()), columns=['명사', '빈도'])
-    freq_df = freq_df.sort_values(by='빈도', ascending=False).reset_index(drop=True)
-    # 결과 표를 Markdown 형식으로 변환
-    try:
-        md_table = freq_df.to_markdown(index=False)
-    except ImportError:
-        md_table = "Markdown 변환을 위해 'tabulate' 라이브러리가 필요합니다."
-        return md_table, None
-    # CSV 파일로 저장
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
-        freq_df.to_csv(tmp.name, index=False, encoding='utf-8-sig')
-        tmp_path = tmp.name
-    return md_table, tmp_path
-########################
-# 4) Gradio 인터페이스 #
-########################
-with gr.Blocks() as demo:
-    with gr.Tab("키워드 카운트"):
-        with gr.Row():
-            # 왼쪽 입력 영역
-            with gr.Column():
-                main_textbox = gr.Textbox(
-                    label="본문 텍스트",
-                    lines=16,
-                    placeholder="여기에 긴 본문을 붙여넣으세요."
-                )
-                keyword_input = gr.Textbox(
-                    label="(선택) 직접 입력 키워드 - 엔터로 구분",
-                    lines=6,
-                    placeholder="예)\n초음파가습기\n가습기\n..."
-                )
-                excel_input = gr.File(
-                    label="(선택) 엑셀 업로드",
-                    file_types=[".xlsx"]
-                )
-                run_button = gr.Button("분석하기")
-            # 오른쪽 출력 영역
-            with gr.Column():
-                output_md = gr.Markdown(label="결과 표")
-                output_file = gr.File(label="결과 다운로드")
-        run_button.click(
-            fn=count_keywords,
-            inputs=[main_textbox, excel_input, keyword_input],
-            outputs=[output_md, output_file]
-        )
-    with gr.Tab("형태소 분석 기반 카운트"):
-        with gr.Row():
-            # 왼쪽 입력 영역
-            with gr.Column():
-                morph_text_input = gr.Textbox(
-                    label="본문 텍스트",
-                    lines=16,
-                    placeholder="여기에 긴 본문을 붙여넣으세요."
-                )
-                morph_run_button = gr.Button("분석하기")
-            # 오른쪽 출력 영역
-            with gr.Column():
-                morph_result_display = gr.Markdown(label="분석 결과")
-                morph_download_button = gr.File(label="결과 다운로드")
-        morph_run_button.click(
-            fn=morph_analysis_and_count,
-            inputs=morph_text_input,
-            outputs=[morph_result_display, morph_download_button]
-        )
-if __name__ == "__main__":
-    demo.launch()


1	+ import os
2	+ exec(os.environ.get('APP'))