Spaces:

soojeongcrystal
/

hybridRAG

Sleeping

App Files Files Community

soojeongcrystal commited on Sep 8, 2024

Commit

2f93eb1

verified ·

1 Parent(s): 558d69b

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -15

app.py CHANGED Viewed

@@ -4,23 +4,25 @@ from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
 import csv
 import io
 # 한국어 처리를 위한 KoSentence-BERT 모델 로드
 model = SentenceTransformer('jhgan/ko-sbert-sts')
 # 전역 변수
 global_recommendations = None
-global_csv_string = None
 youtube_columns = None
-# CSV 문자열 생성 함수
-def create_csv_string(recommendations):
-    output = io.StringIO()
-    writer = csv.writer(output)
-    writer.writerow(["Employee ID", "Employee Name", "Recommended Programs", "Recommended YouTube Content"])
-    for rec in recommendations:
-        writer.writerow(rec)
-    return output.getvalue()
 # 열 매칭 함수
 def auto_match_columns(df, required_cols):
@@ -100,7 +102,7 @@ def match_youtube_content(program_skills, youtube_df, model):
 # 직원 데이터를 분석하여 교육 프로그램을 추천하고, 테이블을 생성하는 함수
 def hybrid_rag(employee_file, program_file, youtube_file, title_col, description_col, url_col, upload_date_col):
     global global_recommendations
-    global global_csv_string
     # 직원 및 프로그램 데이터 로드
     employee_df = pd.read_csv(employee_file.name)
@@ -138,6 +140,10 @@ def hybrid_rag(employee_file, program_file, youtube_file, title_col, description
                         if 'title' in youtube_df.columns and 'url' in youtube_df.columns:
                             recommended_youtube.append(f"{youtube_df.iloc[idx]['title']} (URL: {youtube_df.iloc[idx]['url']})")
         if recommended_programs:
             recommendation = f"직원 {employee[employee_cols['employee_name']]}의 추천 프로그램: {', '.join(recommended_programs)}"
             youtube_recommendation = f"추천 유튜브 콘텐츠: {', '.join(recommended_youtube)}" if recommended_youtube else "추천할 유튜브 콘텐츠가 없습니다."
@@ -153,13 +159,13 @@ def hybrid_rag(employee_file, program_file, youtube_file, title_col, description
     global_recommendations = recommendation_rows
-    # CSV 문자열 생성
-    global_csv_string = create_csv_string(recommendation_rows)
     # 결과 테이블 데이터프레임 생성
     result_df = pd.DataFrame(recommendation_rows, columns=["Employee ID", "Employee Name", "Recommended Programs", "Recommended YouTube Content"])
-    return result_df, gr.File(value=global_csv_string, visible=True), gr.Button(value="CSV 다운로드", visible=True)
 # 채팅 응답 함수
 def chat_response(message, history):
@@ -175,8 +181,8 @@ def chat_response(message, history):
 # CSV 다운로드 함수
 def download_csv():
-    global global_csv_string
-    return gr.File(value=global_csv_string, visible=True)
 # Gradio 블록
 with gr.Blocks(css=".gradio-button {background-color: #007bff; color: white;} .gradio-textbox {border-color: #6c757d;}") as demo:
@@ -221,5 +227,14 @@ with gr.Blocks(css=".gradio-button {background-color: #007bff; color: white;} .g
     msg.submit(chat_response, [msg, chatbot], [chatbot])
     clear.click(lambda: None, None, chatbot, queue=False)
 # Gradio 인터페이스 실행
 demo.launch()

 from sklearn.metrics.pairwise import cosine_similarity
 import csv
 import io
+import tempfile
+import os
 # 한국어 처리를 위한 KoSentence-BERT 모델 로드
 model = SentenceTransformer('jhgan/ko-sbert-sts')
 # 전역 변수
 global_recommendations = None
+global_csv_file = None
 youtube_columns = None
+# CSV 파일 생성 함수
+def create_csv_file(recommendations):
+    with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.csv', encoding='utf-8') as temp_file:
+        writer = csv.writer(temp_file)
+        writer.writerow(["Employee ID", "Employee Name", "Recommended Programs", "Recommended YouTube Content"])
+        for rec in recommendations:
+            writer.writerow(rec)
+    return temp_file.name
 # 열 매칭 함수
 def auto_match_columns(df, required_cols):
 # 직원 데이터를 분석하여 교육 프로그램을 추천하고, 테이블을 생성하는 함수
 def hybrid_rag(employee_file, program_file, youtube_file, title_col, description_col, url_col, upload_date_col):
     global global_recommendations
+    global global_csv_file
     # 직원 및 프로그램 데이터 로드
     employee_df = pd.read_csv(employee_file.name)
                         if 'title' in youtube_df.columns and 'url' in youtube_df.columns:
                             recommended_youtube.append(f"{youtube_df.iloc[idx]['title']} (URL: {youtube_df.iloc[idx]['url']})")
+        # 추천 프로그램 및 유튜브 콘텐츠 개수 제한
+        recommended_programs = recommended_programs[:5]  # 최대 5개 프로그램만 추천
+        recommended_youtube = recommended_youtube[:3]  # 최대 3개 유튜브 콘텐츠만 추천
         if recommended_programs:
             recommendation = f"직원 {employee[employee_cols['employee_name']]}의 추천 프로그램: {', '.join(recommended_programs)}"
             youtube_recommendation = f"추천 유튜브 콘텐츠: {', '.join(recommended_youtube)}" if recommended_youtube else "추천할 유튜브 콘텐츠가 없습니다."
     global_recommendations = recommendation_rows
+    # CSV 파일 생성
+    global_csv_file = create_csv_file(recommendation_rows)
     # 결과 테이블 데이터프레임 생성
     result_df = pd.DataFrame(recommendation_rows, columns=["Employee ID", "Employee Name", "Recommended Programs", "Recommended YouTube Content"])
+    return result_df, gr.File(value=global_csv_file, visible=True), gr.Button(value="CSV 다운로드", visible=True)
 # 채팅 응답 함수
 def chat_response(message, history):
 # CSV 다운로드 함수
 def download_csv():
+    global global_csv_file
+    return gr.File(value=global_csv_file, visible=True)
 # Gradio 블록
 with gr.Blocks(css=".gradio-button {background-color: #007bff; color: white;} .gradio-textbox {border-color: #6c757d;}") as demo:
     msg.submit(chat_response, [msg, chatbot], [chatbot])
     clear.click(lambda: None, None, chatbot, queue=False)
+# 프로그램 종료 시 임시 파일 삭제
+import atexit
+@atexit.register
+def cleanup():
+    global global_csv_file
+    if global_csv_file and os.path.exists(global_csv_file):
+        os.remove(global_csv_file)
 # Gradio 인터페이스 실행
 demo.launch()