6-2_N_review / app.py
Kims12's picture
Update app.py
bd5cb51 verified
import gradio as gr
import pandas as pd
import openpyxl
import datetime
import tempfile
import os
from openpyxl.utils.dataframe import dataframe_to_rows
import openai
# ===================== OpenAI API ํด๋ผ์ด์–ธํŠธ ์„ค์ • =====================
openai.api_key = os.getenv("OPENAI_API_KEY")
def call_api(content, system_message, max_tokens, temperature, top_p):
"""
ChatGPT API ํ˜ธ์ถœ ํ•จ์ˆ˜.
"""
response = openai.ChatCompletion.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": system_message},
{"role": "user", "content": content}
],
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p
)
return response.choices[0].message['content']
def respond_gemini_qna(question, system_message, max_tokens, temperature, top_p, model_id):
"""
Gemini Flash ๋ชจ๋ธ์„ ์ด์šฉํ•ด ์งˆ๋ฌธ(question)์— ๋Œ€ํ•œ ๋‹ต๋ณ€์„ ๋ฐ˜ํ™˜ํ•˜๋Š” ํ•จ์ˆ˜.
"""
try:
import google.generativeai as genai
except ModuleNotFoundError:
return ("์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: 'google-generativeai' ๋ชจ๋“ˆ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. "
"ํ•ด๊ฒฐ ๋ฐฉ๋ฒ•: 'pip install --upgrade google-generativeai' ๋ฅผ ์‹คํ–‰ํ•˜์—ฌ ์„ค์น˜ํ•ด์ฃผ์„ธ์š”.")
gemini_api_key = os.getenv("GEMINI_API_KEY")
if not gemini_api_key:
return "Gemini API ํ† ํฐ์ด ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค."
genai.configure(api_key=gemini_api_key)
prompt = f"{system_message}\n\n{question}"
try:
model = genai.GenerativeModel(model_name=model_id)
response = model.generate_content(prompt)
return response.text
except Exception as e:
return f"์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
def respond_o1mini_qna(question, system_message, max_tokens, temperature):
"""
o1-mini ๋ชจ๋ธ์„ ์ด์šฉํ•ด ํ•œ ๋ฒˆ์˜ ์งˆ๋ฌธ(question)์— ๋Œ€ํ•œ ๋‹ต๋ณ€์„ ๋ฐ˜ํ™˜ํ•˜๋Š” ํ•จ์ˆ˜.
o1-mini์—์„œ๋Š” 'system' ๋ฉ”์‹œ์ง€๋ฅผ ์ง€์›ํ•˜์ง€ ์•Š์œผ๋ฏ€๋กœ system_message์™€ question์„ ํ•˜๋‚˜์˜ 'user' ๋ฉ”์‹œ์ง€๋กœ ํ•ฉ์ณ ์ „๋‹ฌํ•ฉ๋‹ˆ๋‹ค.
๋˜ํ•œ, o1-mini์—์„œ๋Š” 'max_tokens' ๋Œ€์‹  'max_completion_tokens'๋ฅผ ์‚ฌ์šฉํ•˜๋ฉฐ, temperature๋Š” ๊ณ ์ •๊ฐ’ 1๋งŒ ์ง€์›ํ•ฉ๋‹ˆ๋‹ค.
"""
openai_token = os.getenv("OPENAI_API_KEY")
if not openai_token:
return "OpenAI API ํ† ํฐ์ด ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค."
openai.api_key = openai_token
combined_message = f"{system_message}\n\n{question}"
messages = [{"role": "user", "content": combined_message}]
try:
response = openai.ChatCompletion.create(
model="o1-mini",
messages=messages,
max_completion_tokens=max_tokens,
temperature=1, # ๊ณ ์ •๋œ ๊ฐ’ 1 ์‚ฌ์šฉ
)
assistant_message = response.choices[0].message['content']
return assistant_message
except Exception as e:
return f"์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
# ===================== (1) ๋ฆฌ๋ทฐ์˜ต์…˜๋ถ„์„: ๋ฏธ์…˜ 1~9 ์ฒ˜๋ฆฌ =====================
def analyze_options(uploaded_file, selected_year, llm_model_choice):
"""
์—…๋กœ๋“œ๋œ ํŒŒ์ผ๋กœ๋ถ€ํ„ฐ ๋ฏธ์…˜ 1~9(์˜ต์…˜/ํ‰์  ์ง‘๊ณ„)๋ฅผ ์ˆ˜ํ–‰ํ•˜๊ณ ,
์ž„์‹œ ์—‘์…€ ํŒŒ์ผ๊ณผ ์„ ํƒ๋…„๋„ Top20 ์˜ต์…˜ ๋ฆฌ์ŠคํŠธ(์ „์ฒด์˜ต์…˜๋ถ„์„ ํฌํ•จ)๋ฅผ ๋ฐ˜ํ™˜ํ•œ๋‹ค.
"""
if uploaded_file is None:
return None, ["์ „์ฒด์˜ต์…˜๋ถ„์„"]
try:
df_upload = pd.read_excel(uploaded_file)
except Exception as e:
return None, ["์ „์ฒด์˜ต์…˜๋ถ„์„"]
template_file = "๋ฆฌ๋ทฐ๋ถ„์„.ver1.0.xlsx"
if not os.path.exists(template_file):
return None, ["์ „์ฒด์˜ต์…˜๋ถ„์„"]
try:
wb = openpyxl.load_workbook(template_file)
except Exception as e:
return None, ["์ „์ฒด์˜ต์…˜๋ถ„์„"]
# (๋ฏธ์…˜1) ์›๋ณธ๋ฐ์ดํ„ฐ ์‹œํŠธ ์ƒ์„ฑ
if "์›๋ณธ๋ฐ์ดํ„ฐ" in wb.sheetnames:
ws_source = wb["์›๋ณธ๋ฐ์ดํ„ฐ"]
wb.remove(ws_source)
ws_source = wb.create_sheet("์›๋ณธ๋ฐ์ดํ„ฐ")
else:
ws_source = wb.create_sheet("์›๋ณธ๋ฐ์ดํ„ฐ")
for r_idx, row in enumerate(dataframe_to_rows(df_upload, index=False, header=True), start=1):
for c_idx, value in enumerate(row, start=1):
ws_source.cell(row=r_idx, column=c_idx, value=value)
# ๋ฆฌ๋ทฐ ๋‚ ์งœ ์ฒ˜๋ฆฌ
try:
df_upload['๋ฆฌ๋ทฐ๋‚ ์งœ'] = pd.to_datetime(df_upload.iloc[:, 1], errors='coerce')
df_upload['๋ฆฌ๋ทฐ๋‚ ์งœ'] = df_upload['๋ฆฌ๋ทฐ๋‚ ์งœ'].apply(
lambda d: d.replace(tzinfo=None) if pd.notnull(d) and d.tzinfo is not None else d
)
df_valid = df_upload.dropna(subset=['๋ฆฌ๋ทฐ๋‚ ์งœ']).copy()
except Exception as e:
return None, ["์ „์ฒด์˜ต์…˜๋ถ„์„"]
now = datetime.datetime.now()
current_year = now.year
start_year_val = current_year - 2
end_year_val = current_year
# ๋Œ€์‹œ๋ณด๋“œ๋ฐ์ดํ„ฐ ์‹œํŠธ
if "๋Œ€์‹œ๋ณด๋“œ๋ฐ์ดํ„ฐ" in wb.sheetnames:
ws_dashboard = wb["๋Œ€์‹œ๋ณด๋“œ๋ฐ์ดํ„ฐ"]
else:
ws_dashboard = wb.create_sheet("๋Œ€์‹œ๋ณด๋“œ๋ฐ์ดํ„ฐ")
# (๋ฏธ์…˜2) ์ตœ๊ทผ 3๋…„ '๋…„์›”' ์ถ”์ด
ws_dashboard["C2"] = str(current_year)[-2:]
row_idx = 5
for year in range(start_year_val, end_year_val + 1):
for month in range(1, 13):
count = df_valid[(df_valid['๋ฆฌ๋ทฐ๋‚ ์งœ'].dt.year == year) & (df_valid['๋ฆฌ๋ทฐ๋‚ ์งœ'].dt.month == month)].shape[0]
ws_dashboard.cell(row=row_idx, column=3, value=count)
row_idx += 1
# (๋ฏธ์…˜3) ์ตœ๊ทผ 3๋…„ '๋…„๋„' ์ถ”์ด
ws_dashboard["E2"] = str(current_year)[-2:]
row_year = 5
for y in [current_year, current_year - 1, current_year - 2]:
count = df_valid[df_valid['๋ฆฌ๋ทฐ๋‚ ์งœ'].dt.year == y].shape[0]
ws_dashboard.cell(row=row_year, column=6, value=count)
row_year += 1
# (๋ฏธ์…˜4) ์„ ํƒ๋œ ๋…„๋„ '์›”๋ณ„' ์ถ”์ด
try:
selected_year_int = int("20" + selected_year[:2])
except Exception as e:
return None, ["์ „์ฒด์˜ต์…˜๋ถ„์„"]
ws_dashboard["I2"] = selected_year[:2]
for month in range(1, 13):
count = df_valid[(df_valid['๋ฆฌ๋ทฐ๋‚ ์งœ'].dt.year == selected_year_int) & (df_valid['๋ฆฌ๋ทฐ๋‚ ์งœ'].dt.month == month)].shape[0]
ws_dashboard.cell(row=4 + month, column=9, value=count)
# (๋ฏธ์…˜5) ์„ ํƒ๋œ ๋…„๋„ '์›”์ผ' ์ถ”์ด
start_date = datetime.date(selected_year_int, 1, 1)
end_date = datetime.date(selected_year_int, 12, 31)
df_selected = df_valid[df_valid['๋ฆฌ๋ทฐ๋‚ ์งœ'].dt.year == selected_year_int].copy()
df_selected['๋ฆฌ๋ทฐ๋‚ ์งœ_์ผ'] = df_selected['๋ฆฌ๋ทฐ๋‚ ์งœ'].dt.date
daily_counts = df_selected.groupby('๋ฆฌ๋ทฐ๋‚ ์งœ_์ผ').size().to_dict()
row_day = 5
current_day = start_date
while current_day <= end_date:
ws_dashboard.cell(row=row_day, column=12, value=daily_counts.get(current_day, 0))
row_day += 1
current_day += datetime.timedelta(days=1)
# (๋ฏธ์…˜6) ์ตœ๊ทผ 3๋…„ ์ „์ฒด์˜ต์…˜ ๋ฐ์ดํ„ฐ ์ž…๋ ฅ
df_recent = df_valid[(df_valid['๋ฆฌ๋ทฐ๋‚ ์งœ'].dt.year >= start_year_val) & (df_valid['๋ฆฌ๋ทฐ๋‚ ์งœ'].dt.year <= end_year_val)].copy()
if df_recent.shape[1] >= 3:
df_recent['์˜ต์…˜์›๋ณธ'] = df_recent.iloc[:, 2].astype(str).fillna('')
def get_first_option(opt_str):
return opt_str.split(' / ')[0].strip()
df_recent['์˜ต์…˜1'] = df_recent['์˜ต์…˜์›๋ณธ'].apply(get_first_option)
option_counts_3y = df_recent['์˜ต์…˜1'].value_counts()
top20_3y = option_counts_3y.head(20)
sum_others_3y = option_counts_3y.iloc[20:].sum() if len(option_counts_3y) > 20 else 0
row_opt = 5
for opt_name, cnt in top20_3y.items():
ws_dashboard.cell(row=row_opt, column=14, value=opt_name)
ws_dashboard.cell(row=row_opt, column=15, value=cnt)
row_opt += 1
ws_dashboard.cell(row=25, column=15, value=sum_others_3y)
else:
ws_dashboard.cell(row=5, column=14, value="๊ตฌ๋งค์˜ต์…˜ ์—ด์ด ์—†์Šต๋‹ˆ๋‹ค.")
ws_dashboard.cell(row=5, column=15, value=0)
# (๋ฏธ์…˜7) ์„ ํƒ๋…„๋„ ๊ธฐ์ค€ ์˜ต์…˜ ๋ฐ์ดํ„ฐ (๋‚ด๋ฆผ์ฐจ์ˆœ)
df_selected_year = df_valid[df_valid['๋ฆฌ๋ทฐ๋‚ ์งœ'].dt.year == selected_year_int].copy()
if df_selected_year.shape[1] >= 3:
df_selected_year['์˜ต์…˜์›๋ณธ'] = df_selected_year.iloc[:, 2].astype(str).fillna('')
def get_first_option(opt_str):
return opt_str.split(' / ')[0].strip()
df_selected_year['์˜ต์…˜1'] = df_selected_year['์˜ต์…˜์›๋ณธ'].apply(get_first_option)
option_counts_selected = df_selected_year['์˜ต์…˜1'].value_counts()
top20_selected = option_counts_selected.head(20)
sum_others_selected = option_counts_selected.iloc[20:].sum() if len(option_counts_selected) > 20 else 0
row_opt = 5
for opt_name, cnt in top20_selected.items():
ws_dashboard.cell(row=row_opt, column=18, value=opt_name)
ws_dashboard.cell(row=row_opt, column=19, value=cnt)
row_opt += 1
ws_dashboard.cell(row=25, column=19, value=sum_others_selected)
else:
ws_dashboard.cell(row=5, column=18, value="๊ตฌ๋งค์˜ต์…˜ ์—ด์ด ์—†์Šต๋‹ˆ๋‹ค.")
ws_dashboard.cell(row=5, column=19, value=0)
# (๋ฏธ์…˜8) ์ตœ๊ทผ3๋…„ ํ‰์ ํ˜„ํ™ฉ - top 20 ์˜ต์…˜
df_recent_score = df_valid[(df_valid['๋ฆฌ๋ทฐ๋‚ ์งœ'].dt.year >= start_year_val) & (df_valid['๋ฆฌ๋ทฐ๋‚ ์งœ'].dt.year <= end_year_val)].copy()
if df_recent_score.shape[1] >= 5:
df_recent_score['์˜ต์…˜์›๋ณธ'] = df_recent_score.iloc[:, 2].astype(str).fillna('')
def get_first_option(opt_str):
return opt_str.split(' / ')[0].strip()
df_recent_score['์˜ต์…˜1'] = df_recent_score['์˜ต์…˜์›๋ณธ'].apply(get_first_option)
df_recent_score['ํ‰์ '] = pd.to_numeric(df_recent_score.iloc[:, 4], errors='coerce')
df_recent_score = df_recent_score.dropna(subset=['ํ‰์ '])
group_8 = df_recent_score.groupby(['์˜ต์…˜1', 'ํ‰์ ']).size().unstack(fill_value=0)
group_8 = group_8.reindex(columns=[5,4,3,2,1], fill_value=0)
group_8['total_count'] = group_8.sum(axis=1)
group_8 = group_8.sort_values(by='total_count', ascending=False)
top20_8 = group_8.head(20).copy()
others_8 = group_8.iloc[20:].copy()
sum_others_5 = others_8[5].sum()
sum_others_4 = others_8[4].sum()
sum_others_3 = others_8[3].sum()
sum_others_2 = others_8[2].sum()
sum_others_1 = others_8[1].sum()
row_v = 5
for opt_name, row_data in top20_8.iterrows():
ws_dashboard.cell(row=row_v, column=22, value=opt_name)
ws_dashboard.cell(row=row_v, column=24, value=row_data[5])
ws_dashboard.cell(row=row_v, column=25, value=row_data[4])
ws_dashboard.cell(row=row_v, column=26, value=row_data[3])
ws_dashboard.cell(row=row_v, column=27, value=row_data[2])
ws_dashboard.cell(row=row_v, column=28, value=row_data[1])
row_v += 1
ws_dashboard.cell(row=25, column=24, value=sum_others_5)
ws_dashboard.cell(row=25, column=25, value=sum_others_4)
ws_dashboard.cell(row=25, column=26, value=sum_others_3)
ws_dashboard.cell(row=25, column=27, value=sum_others_2)
ws_dashboard.cell(row=25, column=28, value=sum_others_1)
else:
ws_dashboard.cell(row=5, column=22, value="์˜ต์…˜ ๋˜๋Š” ํ‰์  ์—ด์ด ๋ถ€์กฑํ•ฉ๋‹ˆ๋‹ค.")
# (๋ฏธ์…˜9) ์„ ํƒ๋…„๋„ ํ‰์ ํ˜„ํ™ฉ - top 20 ์˜ต์…˜
df_selected_score = df_valid[df_valid['๋ฆฌ๋ทฐ๋‚ ์งœ'].dt.year == selected_year_int].copy()
if df_selected_score.shape[1] >= 5:
df_selected_score['์˜ต์…˜์›๋ณธ'] = df_selected_score.iloc[:, 2].astype(str).fillna('')
def get_first_option(opt_str):
return opt_str.split(' / ')[0].strip()
df_selected_score['์˜ต์…˜1'] = df_selected_score['์˜ต์…˜์›๋ณธ'].apply(get_first_option)
df_selected_score['ํ‰์ '] = pd.to_numeric(df_selected_score.iloc[:, 4], errors='coerce')
df_selected_score = df_selected_score.dropna(subset=['ํ‰์ '])
group_9 = df_selected_score.groupby(['์˜ต์…˜1', 'ํ‰์ ']).size().unstack(fill_value=0)
group_9 = group_9.reindex(columns=[5,4,3,2,1], fill_value=0)
group_9['total_count'] = group_9.sum(axis=1)
group_9 = group_9.sort_values(by='total_count', ascending=False)
top20_9 = group_9.head(20).copy()
others_9 = group_9.iloc[20:].copy()
sum_others_5 = others_9[5].sum()
sum_others_4 = others_9[4].sum()
sum_others_3 = others_9[3].sum()
sum_others_2 = others_9[2].sum()
sum_others_1 = others_9[1].sum()
row_ad = 5
for opt_name, row_data in top20_9.iterrows():
ws_dashboard.cell(row=row_ad, column=30, value=opt_name)
ws_dashboard.cell(row=row_ad, column=32, value=row_data[5])
ws_dashboard.cell(row=row_ad, column=33, value=row_data[4])
ws_dashboard.cell(row=row_ad, column=34, value=row_data[3])
ws_dashboard.cell(row=row_ad, column=35, value=row_data[2])
ws_dashboard.cell(row=row_ad, column=36, value=row_data[1])
row_ad += 1
ws_dashboard.cell(row=25, column=32, value=sum_others_5)
ws_dashboard.cell(row=25, column=33, value=sum_others_4)
ws_dashboard.cell(row=25, column=34, value=sum_others_3)
ws_dashboard.cell(row=25, column=35, value=sum_others_2)
ws_dashboard.cell(row=25, column=36, value=sum_others_1)
else:
ws_dashboard.cell(row=5, column=30, value="์˜ต์…˜ ๋˜๋Š” ํ‰์  ์—ด์ด ๋ถ€์กฑํ•ฉ๋‹ˆ๋‹ค.")
# ์ž„์‹œ ์—‘์…€ ํŒŒ์ผ ์ €์žฅ
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
wb.save(temp_file.name)
temp_file.close()
# ์„ ํƒ๋…„๋„ Top20 ์˜ต์…˜ ๋ฆฌ์ŠคํŠธ ์ƒ์„ฑ (์ „์ฒด์˜ต์…˜๋ถ„์„ ํฌํ•จ)
top20_option_list = ["์ „์ฒด์˜ต์…˜๋ถ„์„"]
if df_selected_year.shape[1] >= 3:
option_counts_selected = df_selected_year['์˜ต์…˜1'].value_counts().head(20)
for opt_name, cnt in option_counts_selected.items():
top20_option_list.append(f"{opt_name}({cnt})")
return temp_file.name, top20_option_list
# ===================== (2) ๋ฆฌ๋ทฐ๋ถ„์„: ๋ฏธ์…˜ 10~12 ์‹คํ–‰ =====================
def analyze_reviews(partial_file, selected_option, llm_model_choice):
"""
'๋ฆฌ๋ทฐ์˜ต์…˜๋ถ„์„' ๊ฒฐ๊ณผ ์—‘์…€๊ณผ ์„ ํƒ ์˜ต์…˜(๋˜๋Š” ์ „์ฒด์˜ต์…˜๋ถ„์„)์„ ๋ฐ”ํƒ•์œผ๋กœ
๋ฏธ์…˜ 10~12(์ฃผ์š”๊ธ์ •/๋ถ€์ • ๋ฆฌ๋ทฐ ์ถ”์ถœ + LLM ๋ถ„์„)๋ฅผ ์ˆ˜ํ–‰ํ•˜๊ณ ,
์ตœ์ข… ์—‘์…€ ํŒŒ์ผ๊ณผ ํ…์ŠคํŠธ ๋ถ„์„ ๊ฒฐ๊ณผ๋ฅผ ๋ฐ˜ํ™˜ํ•œ๋‹ค.
"""
if partial_file is None:
return None, "", "", "", ""
try:
wb = openpyxl.load_workbook(partial_file)
except Exception as e:
return None, "", "", "", ""
if "์›๋ณธ๋ฐ์ดํ„ฐ" not in wb.sheetnames:
return None, "", "", "", ""
ws_source = wb["์›๋ณธ๋ฐ์ดํ„ฐ"]
data = ws_source.values
cols = next(data)
df_upload = pd.DataFrame(data, columns=cols)
# (๋ฏธ์…˜10) ์ฃผ์š” ๋ฆฌ๋ทฐ ๋‚ด์šฉ ์‹œํŠธ ์ƒ์„ฑ
if "์ฃผ์š” ๋ฆฌ๋ทฐ ๋‚ด์šฉ" in wb.sheetnames:
ws_review = wb["์ฃผ์š” ๋ฆฌ๋ทฐ ๋‚ด์šฉ"]
wb.remove(ws_review)
ws_review = wb.create_sheet("์ฃผ์š” ๋ฆฌ๋ทฐ ๋‚ด์šฉ")
else:
ws_review = wb.create_sheet("์ฃผ์š” ๋ฆฌ๋ทฐ ๋‚ด์šฉ")
if df_upload.shape[1] < 5:
ws_review.cell(row=1, column=1, value="๋ฆฌ๋ทฐ๋‚ด์šฉ ๋ฐ ํ‰์  ์—ด์ด ๋ถ€์กฑํ•ฉ๋‹ˆ๋‹ค.")
return None, "", "", "", ""
df_upload['์˜ต์…˜์›๋ณธ'] = df_upload.iloc[:, 2].astype(str).fillna('')
if selected_option == "์ „์ฒด์˜ต์…˜๋ถ„์„":
df_filtered = df_upload.copy()
else:
selected_opt_name = selected_option.rsplit("(", 1)[0].strip()
def get_first_option(opt_str):
return opt_str.split(' / ')[0].strip()
df_upload['์˜ต์…˜1'] = df_upload['์˜ต์…˜์›๋ณธ'].apply(get_first_option)
df_filtered = df_upload[df_upload['์˜ต์…˜1'] == selected_opt_name].copy()
# (๋ฏธ์…˜10) ์ฃผ์š” ๋ฆฌ๋ทฐ ๋‚ด์šฉ(ํ‰์ =3 ์ œ์™ธ) + ๊ธ€์ž์ˆ˜ ์—ด ์ƒ์„ฑ โ†’ ๋‚ด๋ฆผ์ฐจ์ˆœ ์ •๋ ฌ
df_review = df_filtered.copy()
df_review = df_review[df_review.iloc[:,4] != 3]
df_review['๊ธ€์ž์ˆ˜'] = df_review.iloc[:,3].astype(str).apply(len)
df_review = df_review.sort_values(by='๊ธ€์ž์ˆ˜', ascending=False)
for r_idx, row in enumerate(dataframe_to_rows(df_review, index=False, header=True), start=1):
for c_idx, value in enumerate(row, start=1):
ws_review.cell(row=r_idx, column=c_idx, value=value)
# (๋ฏธ์…˜11) ๊ธ์ •/๋ถ€์ • ๋ฆฌ๋ทฐ ์ถ”์ถœ
df_positive = df_review[(df_review.iloc[:,4] == 5) & (df_review['๊ธ€์ž์ˆ˜'] < 500)].copy()
if df_positive.shape[0] < 20:
df_positive_4 = df_review[(df_review.iloc[:,4] == 4) & (df_review['๊ธ€์ž์ˆ˜'] < 500)]
df_positive = pd.concat([df_positive, df_positive_4])
df_positive = df_positive.head(20)
positive_reviews = ""
for idx, row in df_positive.iterrows():
positive_reviews += f"์•„์ด๋””: {row.iloc[0]}, ์ ์ˆ˜: {row.iloc[4]}, ๊ธ€์ž์ˆ˜: {row['๊ธ€์ž์ˆ˜']}\n๋ฆฌ๋ทฐ: {row.iloc[3]}\n\n"
df_negative = df_review[(df_review.iloc[:,4] == 1) & (df_review['๊ธ€์ž์ˆ˜'] < 500)].copy()
if df_negative.shape[0] < 30:
df_negative_2 = df_review[(df_review.iloc[:,4] == 2) & (df_review['๊ธ€์ž์ˆ˜'] < 500)]
df_negative = pd.concat([df_negative, df_negative_2])
df_negative = df_negative.head(30)
negative_reviews = ""
for idx, row in df_negative.iterrows():
negative_reviews += f"์•„์ด๋””: {row.iloc[0]}, ์ ์ˆ˜: {row.iloc[4]}, ๊ธ€์ž์ˆ˜: {row['๊ธ€์ž์ˆ˜']}\n๋ฆฌ๋ทฐ: {row.iloc[3]}\n\n"
# (๋ฏธ์…˜12) LLM์„ ์ด์šฉํ•œ ๋ฆฌ๋ทฐ ๋ถ„์„
positive_system_msg = (
"๐Ÿ“ ๊ธ์ •๋ฆฌ๋ทฐ ๋ถ„์„:\n"
"๋„ˆ๋Š” ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ๋ฅผ ๋ถ„์„ํ•˜๋Š” ๋น…๋ฐ์ดํ„ฐ ๋ถ„์„๊ฐ€์ด๋‹ค. ๊ณ ๊ฐ์˜ ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ๊ธ์ •์ ์ธ ์˜๊ฒฌ๋งŒ์„ ๋ถ„์„ํ•ด๋ผ. ๋ฐ˜๋“œ์‹œ ์ œ๊ณต๋œ ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ์—์„œ๋งŒ ๋ถ„์„ํ•˜๋ฉฐ, ๋„ˆ์˜ ์ƒ๊ฐ์€ ํฌํ•จํ•˜์ง€ ๋ง ๊ฒƒ.\n"
"[๋ถ„์„ ์กฐ๊ฑด]\n"
"- ๋ฐ˜๋“œ์‹œ ๊ธ์ •์ ์ธ ์˜๊ฒฌ๋งŒ ๋ถ„์„ํ•˜๊ณ , ๋ถ€์ •์ ์ธ ์˜๊ฒฌ์€ ์ œ์™ธํ•  ๊ฒƒ.\n"
"- ๊ธฐ๋Šฅ ๋ฐ ์„ฑ๋Šฅ, ๊ฐ์„ฑ, ์‹ค์ œ ์‚ฌ์šฉ, ๋ฐฐ์†ก, ํƒ€๊ฒŸ๋ณ„ ๊ด€์ ์œผ๋กœ ๋ถ„์„ํ•  ๊ฒƒ.\n"
"- ๋งˆ์ผ€ํŒ…์— ํ™œ์šฉํ•  ์ˆ˜ ์žˆ๋Š” ๊ณ ๊ฐ์˜ ์‹ค์ œ ๋ฆฌ๋ทฐ ๋‹จ์–ด๋ฅผ ๋ฐ˜๋“œ์‹œ ํฌํ•จํ•  ๊ฒƒ.\n"
"[์ถœ๋ ฅ ํ˜•ํƒœ]\n"
"- ๊ฐ๊ฐ์˜ ์ œ๋ชฉ ์•ž์—๋Š” '๐Ÿ“' ์ด๋ชจ์ง€๋ฅผ ์‚ฌ์šฉํ•˜๋ฉฐ, '#'๋‚˜ '##'์€ ์‚ฌ์šฉํ•˜์ง€ ๋ง ๊ฒƒ.\n"
"- ๊ฐ€์žฅ ๋งˆ์ง€๋ง‰์—๋Š” \"๐Ÿ†์ข…ํ•ฉ์˜๊ฒฌ\"์ด๋ผ๋Š” ์ œ๋ชฉ์œผ๋กœ ์ข…ํ•ฉ ์˜๊ฒฌ์„ ์ž‘์„ฑํ•˜๋ผ.\n"
" - ์ข…ํ•ฉ์˜๊ฒฌ์—๋Š” ํ•ญ๋ชฉ๋ณ„ ์ œ๋ชฉ์„ ์ œ์™ธํ•˜๊ณ  ์„œ์ˆ ์‹ ๋ฌธ์žฅ์œผ๋กœ ์ž‘์„ฑํ•  ๊ฒƒ.\n"
" - ์ด์–ด์„œ '๐Ÿน ๊ฐ•์ '๊ณผ '๐Ÿน ๊ธฐํšŒ' ์ œ๋ชฉ์œผ๋กœ SWOT ๋ถ„์„ ๊ฒฐ๊ณผ๋ฅผ ์ œ๊ณตํ•  ๊ฒƒ.\n"
"- ์‹ค์ œ ๊ณ ๊ฐ์˜ ๋ฆฌ๋ทฐ์—์„œ ์‚ฌ์šฉ๋œ ๋‹จ์–ด๋ฅผ ๋ฐ˜๋“œ์‹œ ํฌํ•จํ•  ๊ฒƒ."
)
positive_user_msg = "๋‹ค์Œ ๋ฆฌ๋ทฐ๋ฅผ ๋ถ„์„ํ•ด ์ฃผ์„ธ์š”:\n" + positive_reviews
negative_system_msg = (
"๐Ÿ“ ๋ถ€์ •๋ฆฌ๋ทฐ ๋ถ„์„:\n"
"๋„ˆ๋Š” ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ๋ฅผ ๋ถ„์„ํ•˜๋Š” ๋น…๋ฐ์ดํ„ฐ ๋ถ„์„๊ฐ€์ด๋‹ค. ๊ณ ๊ฐ์˜ ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ๋ถ€์ •์ ์ธ ์˜๊ฒฌ๋งŒ์„ ๋ถ„์„ํ•ด๋ผ. ๋ฐ˜๋“œ์‹œ ์ œ๊ณต๋œ ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ์—์„œ๋งŒ ๋ถ„์„ํ•˜๋ฉฐ, ๋„ˆ์˜ ์ƒ๊ฐ์€ ํฌํ•จํ•˜์ง€ ๋ง ๊ฒƒ.\n"
"[๋ถ„์„ ์กฐ๊ฑด]\n"
"- ๋ถ€์ •์ ์ธ ์˜๊ฒฌ๋งŒ ๋ถ„์„ํ•˜๊ณ , ๊ธ์ •์ ์ธ ์˜๊ฒฌ์€ ์ œ์™ธํ•  ๊ฒƒ.\n"
"- ๊ธฐ๋Šฅ ๋ฐ ์„ฑ๋Šฅ, ๊ฐ์„ฑ, ์‹ค์ œ ์‚ฌ์šฉ, ๋ฐฐ์†ก, ๊ณ ๊ฐ์˜ ๋ถ„๋…ธ ๊ด€์ ์œผ๋กœ ๋ถ„์„ํ•  ๊ฒƒ.\n"
"- ๋ถ€์ •์ ์ธ ๋ฆฌ๋ทฐ ๋ถ„์„ ๊ฒฐ๊ณผ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ '๊ฐœ์„ ํ•  ์ '์„ ์ถœ๋ ฅํ•  ๊ฒƒ.\n"
"[์ถœ๋ ฅ ํ˜•ํƒœ]\n"
"- ๊ฐ๊ฐ์˜ ์ œ๋ชฉ ์•ž์—๋Š” '๐Ÿ“' ์ด๋ชจ์ง€๋ฅผ ์‚ฌ์šฉํ•˜๋ฉฐ, '#'๋‚˜ '##'์€ ์‚ฌ์šฉํ•˜์ง€ ๋ง ๊ฒƒ.\n"
"- ๊ฐ€์žฅ ๋งˆ์ง€๋ง‰์—๋Š” \"๐Ÿ“ข๊ฐœ์„ ํ•  ์ \"์ด๋ผ๋Š” ์ œ๋ชฉ์œผ๋กœ ๊ฐœ์„ ํ•  ์ ์„ ์ž‘์„ฑํ•˜๋ผ.\n"
" - ๊ฐœ์„ ํ•  ์ ์—๋Š” ํ•ญ๋ชฉ๋ณ„ ์ œ๋ชฉ์„ ์ œ์™ธํ•˜๊ณ  ์„œ์ˆ ์‹ ๋ฌธ์žฅ์œผ๋กœ ์ž‘์„ฑํ•  ๊ฒƒ.\n"
" - ์ด์–ด์„œ '๐Ÿ’‰ ์•ฝ์ '๊ณผ '๐Ÿ’‰ ์œ„ํ˜‘' ์ œ๋ชฉ์œผ๋กœ SWOT ๋ถ„์„ ๊ฒฐ๊ณผ๋ฅผ ์ œ๊ณตํ•  ๊ฒƒ.\n"
"- ์‹ค์ œ ๊ณ ๊ฐ์˜ ๋ฆฌ๋ทฐ์—์„œ ์‚ฌ์šฉ๋œ ๋‹จ์–ด๋ฅผ ๋ฐ˜๋“œ์‹œ ํฌํ•จํ•  ๊ฒƒ."
)
negative_user_msg = "๋‹ค์Œ ๋ฆฌ๋ทฐ๋ฅผ ๋ถ„์„ํ•ด ์ฃผ์„ธ์š”:\n" + negative_reviews
if llm_model_choice == "ChatGPT (gpt-4o-mini)":
positive_analysis = call_api(
positive_user_msg, positive_system_msg,
max_tokens=15000, temperature=0.3, top_p=0.95
)
negative_analysis = call_api(
negative_user_msg, negative_system_msg,
max_tokens=15000, temperature=0.3, top_p=0.95
)
elif llm_model_choice == "Gemini Flash (gemini-2.0-flash)":
positive_analysis = respond_gemini_qna(
positive_user_msg, positive_system_msg,
max_tokens=15000, temperature=0.3, top_p=0.95,
model_id="gemini-2.0-flash"
)
negative_analysis = respond_gemini_qna(
negative_user_msg, negative_system_msg,
max_tokens=15000, temperature=0.3, top_p=0.95,
model_id="gemini-2.0-flash"
)
elif llm_model_choice == "o1-mini":
positive_analysis = respond_o1mini_qna(
positive_user_msg, positive_system_msg,
max_tokens=15000, temperature=1
)
negative_analysis = respond_o1mini_qna(
negative_user_msg, negative_system_msg,
max_tokens=15000, temperature=1
)
else:
positive_analysis = "LLM ๋ชจ๋ธ ์„ ํƒ ์˜ค๋ฅ˜"
negative_analysis = "LLM ๋ชจ๋ธ ์„ ํƒ ์˜ค๋ฅ˜"
if "๋Œ€์‹œ๋ณด๋“œ๋ฐ์ดํ„ฐ" in wb.sheetnames:
ws_dashboard = wb["๋Œ€์‹œ๋ณด๋“œ๋ฐ์ดํ„ฐ"]
ws_dashboard["AL5"] = positive_analysis
ws_dashboard["AM5"] = negative_analysis
final_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
wb.save(final_file.name)
final_file.close()
return final_file.name, positive_reviews, negative_reviews, positive_analysis, negative_analysis
# ===================== Gradio UI ๊ตฌ์„ฑ (HTML/CSS ์ปค์Šคํ„ฐ๋งˆ์ด์ง• ์ ์šฉ) =====================
custom_css = """
body {
background: #f7f7f7;
font-family: 'Arial', sans-serif;
}
.gradio-container {
padding: 20px;
}
.custom-header {
text-align: center;
font-size: 36px;
font-weight: bold;
margin-bottom: 20px;
color: #333;
}
.custom-frame {
border: 2px solid #ccc;
border-radius: 20px;
padding: 20px;
margin: 10px 0;
background-color: #fff;
}
.custom-button {
border-radius: 20px !important;
background: #4caf50 !important;
color: white !important;
font-size: 20px !important;
padding: 10px 20px !important;
}
.custom-title {
font-size: 28px;
font-weight: bold;
margin-bottom: 10px;
color: #555;
}
.custom-subtitle {
font-size: 22px;
font-weight: bold;
margin-bottom: 8px;
color: #666;
}
.instructions {
background-color: #e0f7fa;
border: 2px dashed #00796b;
border-radius: 15px;
padding: 15px;
font-size: 18px;
color: #004d40;
margin-bottom: 20px;
text-align: left;
}
"""
# ๊ฐ„๋‹จํ•˜๊ณ  ์ขŒ์ธก ์ •๋ ฌ๋œ ๋‹จ๊ณ„๋ณ„ ์‚ฌ์šฉ์„ค๋ช…์„œ (๋จผ์ € ์ •์˜)
usage_instructions = """
<div class="instructions">
<p style="font-size:24px; font-weight:bold;">๐Ÿ“– ์‚ฌ์šฉ์„ค๋ช…์„œ</p>
<p style="font-size:16px;">
1๋‹จ๊ณ„: ์ขŒ์ธก "๐Ÿ“‘ ๋ฐ์ดํ„ฐ ์ž…๋ ฅ"์—์„œ ํŒŒ์ผ ์—…๋กœ๋“œ์™€ ๋ถ„์„๋…„๋„ ์„ ํƒ ํ›„, ๋ฆฌ๋ทฐ์˜ต์…˜๋ถ„์„ ๋ฒ„ํŠผ ํด๋ฆญ<br>
2๋‹จ๊ณ„: ํ•˜๋‹จ "๐Ÿ“‘ ๋ฆฌ๋ทฐ๋ถ„์„ ์˜ต์…˜์„ ํƒ"์—์„œ LLM ๋ชจ๋ธ๊ณผ ์•„์ดํ…œ์˜ต์…˜ ์„ ํƒ ํ›„, ๋ฆฌ๋ทฐ๋ถ„์„ ์˜ต์…˜์„ ํƒ ๋ฒ„ํŠผ ํด๋ฆญ<br>
3๋‹จ๊ณ„: ์šฐ์ธก "๐Ÿ“‘๋ถ„์„๋ณด๊ณ ์„œ ๋‹ค์šด๋กœ๋“œ"์—์„œ ๋ณด๊ณ ์„œ๋ฅผ ๋‹ค์šด๋กœ๋“œํ•˜๊ณ  ๊ฒฐ๊ณผ ํ™•์ธ<br>
</p>
</div>
"""
with gr.Blocks(css=custom_css, title="๋ฆฌ๋ทฐ ๋ถ„์„ ์„œ๋น„์Šค") as demo:
gr.HTML("<div class='custom-header'>๐ŸŒŸ ๊ณ ๊ฐ ๋ฆฌ๋ทฐ ๋ถ„์„ ์„œ๋น„์Šค ๐ŸŒŸ</div>")
gr.HTML(usage_instructions)
# [๋ฐ์ดํ„ฐ ์ž…๋ ฅ ๋ฐ ๋ถ„์„๋ณด๊ณ ์„œ ๋‹ค์šด๋กœ๋“œ ํ”„๋ ˆ์ž„] (์ขŒ/์šฐ ๋ฐฐ์น˜)
with gr.Row():
with gr.Column(elem_classes="custom-frame"):
gr.HTML("<div class='custom-title'>๐Ÿ“‘ ๋ฐ์ดํ„ฐ ์ž…๋ ฅ</div>")
file_input = gr.File(label="์›๋ณธ ์—‘์…€ ํŒŒ์ผ ์—…๋กœ๋“œ", file_types=[".xlsx"])
year_radio = gr.Radio(
choices=[f"{str(y)[-2:]}๋…„" for y in range(datetime.datetime.now().year, datetime.datetime.now().year-5, -1)],
label="๋ถ„์„๋…„๋„ ์„ ํƒ",
value=f"{str(datetime.datetime.now().year)[-2:]}๋…„"
)
analyze_button = gr.Button("๋ฆฌ๋ทฐ์˜ต์…˜๋ถ„์„", elem_classes="custom-button")
with gr.Column(elem_classes="custom-frame"):
gr.HTML("<div class='custom-title'>๐Ÿ“‘๋ถ„์„๋ณด๊ณ ์„œ ๋‹ค์šด๋กœ๋“œ</div>")
download_final_output = gr.File(label="๋ณด๊ณ ์„œ ๋‹ค์šด๋กœ๋“œ")
# [๋ฆฌ๋ทฐ๋ถ„์„ ํ”„๋ ˆ์ž„] (๋ฐ์ดํ„ฐ ์ž…๋ ฅ ํ”„๋ ˆ์ž„๊ณผ ๋ณ„๋„; ์ดˆ๊ธฐ์—๋Š” ์ˆจ๊น€)
with gr.Column(elem_classes="custom-frame", visible=False) as review_analysis_frame:
gr.HTML("<div class='custom-title'>๋ฆฌ๋ทฐ๋ถ„์„ ์˜ต์…˜์„ ํƒ</div>")
llm_model_radio = gr.Radio(
choices=["ChatGPT (gpt-4o-mini)", "Gemini Flash (gemini-2.0-flash)", "o1-mini"],
label="LLM ๋ชจ๋ธ ์„ ํƒ",
value="ChatGPT (gpt-4o-mini)"
)
top20_dropdown = gr.Dropdown(
label="์•„์ดํ…œ์˜ต์…˜ ๋ถ„์„",
choices=["์ „์ฒด์˜ต์…˜๋ถ„์„"],
value="์ „์ฒด์˜ต์…˜๋ถ„์„"
)
review_button = gr.Button("๐Ÿ“‘ ๋ฆฌ๋ทฐ๋ถ„์„ ์˜ต์…˜์„ ํƒ", elem_classes="custom-button")
# [๋ถ„์„ ๊ฒฐ๊ณผ ํ”„๋ ˆ์ž„] - ์ƒ๋‹จ ์ œ๋ชฉ ์ถ”๊ฐ€: "๐Ÿ“‘ ์˜ต์…˜๋ณ„ ๋ฆฌ๋ทฐ๋ถ„์„"
with gr.Column(elem_classes="custom-frame"):
gr.HTML("<div class='custom-title'>๐Ÿ“‘ ์˜ต์…˜๋ณ„ ๋ฆฌ๋ทฐ๋ถ„์„</div>")
with gr.Row():
with gr.Column(elem_classes="custom-frame"):
gr.HTML("<div class='custom-subtitle'>โœจ ์ฃผ์š”๊ธ์ •๋ฆฌ๋ทฐ</div>")
positive_output = gr.Textbox(label="๊ธ์ •๋ฆฌ๋ทฐ๋ฆฌ์ŠคํŠธ(20๊ฐœ)", lines=10)
with gr.Column(elem_classes="custom-frame"):
gr.HTML("<div class='custom-subtitle'>โœจ ์ฃผ์š”๋ถ€์ •๋ฆฌ๋ทฐ</div>")
negative_output = gr.Textbox(label="๋ถ€์ •๋ฆฌ๋ทฐ๋ฆฌ์ŠคํŠธ(30๊ฐœ)", lines=10)
with gr.Row():
with gr.Column(elem_classes="custom-frame"):
gr.HTML("<div class='custom-subtitle'>๐Ÿ“ข ๊ธ์ •๋ฆฌ๋ทฐ ๋ถ„์„</div>")
positive_analysis_output = gr.Textbox(label="๊ธ์ •๋ฆฌ๋ทฐ ๋ถ„์„", lines=8)
with gr.Column(elem_classes="custom-frame"):
gr.HTML("<div class='custom-subtitle'>๐Ÿ“ข ๋ถ€์ •๋ฆฌ๋ทฐ ๋ถ„์„</div>")
negative_analysis_output = gr.Textbox(label="๋ถ€์ •๋ฆฌ๋ทฐ ๋ถ„์„", lines=8)
# hidden state: ๋ฆฌ๋ทฐ์˜ต์…˜๋ถ„์„ ๊ฒฐ๊ณผ ์—‘์…€ ํŒŒ์ผ ์ €์žฅ
partial_file_state = gr.State()
# [๋ฐ์ดํ„ฐ ์ž…๋ ฅ] - ๋ฆฌ๋ทฐ์˜ต์…˜๋ถ„์„ ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ์‹คํ–‰:
# ๋ฆฌ๋ทฐ์˜ต์…˜๋ถ„์„ ๊ฒฐ๊ณผ๋ฅผ ์ €์žฅํ•˜๊ณ , ๋ฆฌ๋ทฐ๋ถ„์„ ํ”„๋ ˆ์ž„์„ ๋ณด์ด๊ฒŒ ํ•˜๋ฉฐ ์•„์ดํ…œ์˜ต์…˜ ๋ถ„์„ ๋“œ๋กญ๋‹ค์šด ์—…๋ฐ์ดํŠธ
def on_click_analyze_options(uploaded_file, selected_year):
partial_file, top20_list = analyze_options(uploaded_file, selected_year, "ChatGPT (gpt-4o-mini)")
return partial_file, gr.update(visible=True), gr.update(choices=top20_list, value="์ „์ฒด์˜ต์…˜๋ถ„์„")
analyze_button.click(
fn=on_click_analyze_options,
inputs=[file_input, year_radio],
outputs=[partial_file_state, review_analysis_frame, top20_dropdown]
)
# [๋ฆฌ๋ทฐ๋ถ„์„] - ๋ฆฌ๋ทฐ๋ถ„์„ ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ์‹คํ–‰
def on_click_analyze_reviews(partial_file, selected_option, llm_model):
final_file, pos_reviews, neg_reviews, pos_analysis, neg_analysis = analyze_reviews(
partial_file, selected_option, llm_model
)
return final_file, pos_reviews, neg_reviews, pos_analysis, neg_analysis
review_button.click(
fn=on_click_analyze_reviews,
inputs=[partial_file_state, top20_dropdown, llm_model_radio],
outputs=[download_final_output, positive_output, negative_output, positive_analysis_output, negative_analysis_output]
)
if __name__ == "__main__":
demo.launch()