Spaces:
Sleeping
Sleeping
import base64 | |
import json | |
import uuid | |
import os | |
import asyncio | |
import aiohttp | |
import pandas as pd | |
import openpyxl | |
from openpyxl.chart import BarChart, Reference, PieChart | |
from openpyxl.chart.label import DataLabelList | |
from openpyxl.utils.dataframe import dataframe_to_rows | |
from datetime import datetime | |
import matplotlib.pyplot as plt | |
import gradio as gr | |
import tempfile | |
from dotenv import load_dotenv | |
import matplotlib | |
matplotlib.use('Agg') | |
# Load environment variables | |
load_dotenv() | |
# Set up OpenAI API key | |
openai_api_key = os.getenv("OPENAI_API_KEY") | |
# Read excel data for review analysis | |
def read_excel_data(file): | |
df = pd.read_excel(file, usecols="A, B, C, D, E", skiprows=1, | |
names=["ID", "Review Date", "Option", "Review", "ReviewScore"], engine='openpyxl') | |
df['Review Date'] = pd.to_datetime(df['Review Date']).dt.tz_localize(None).dt.date | |
df['Year-Month'] = df['Review Date'].astype(str).str.slice(0, 7) | |
df['Year'] = df['Review Date'].astype(str).str.slice(0, 4) | |
df['Month'] = df['Review Date'].astype(str).str.slice(5, 7) | |
df['Day'] = df['Review Date'].astype(str).str.slice(8, 10) | |
df['Option'] = df['Option'].astype(str) # Ensure Option column is treated as string | |
df['Option1'] = df['Option'].str.split(" / ").str[0] # 1์ฐจ ์ต์ ๋ง ์ถ์ถ | |
df['Review Length'] = df['Review'].str.len() # ์ถ๊ฐ๋ ๋ถ๋ถ: ๋ฆฌ๋ทฐ ๊ธธ์ด ๊ณ์ฐ | |
return df | |
# Analyze review data | |
def analyze_data(df): | |
monthly_data = df.groupby('Year-Month').size().reset_index(name='Counts') | |
yearly_data = df.groupby('Year').size().reset_index(name='Counts') | |
return monthly_data, yearly_data | |
def analyze_monthly_data_for_year(df, selected_year): | |
monthly_data_for_year = df[df['Year'] == selected_year].groupby('Month').size().reset_index(name='Counts') | |
all_months = pd.DataFrame([f"{m:02d}" for m in range(1, 13)], columns=['Month']) | |
monthly_trend_for_year = pd.merge(all_months, monthly_data_for_year, on='Month', how='left') | |
monthly_trend_for_year['Counts'] = monthly_trend_for_year['Counts'].fillna(0).astype(int) | |
return monthly_trend_for_year | |
def analyze_daily_data(df, selected_year): | |
start_date = datetime.strptime(f"{selected_year}-01-01", "%Y-%m-%d").date() | |
end_date = datetime.strptime(f"{selected_year}-12-31", "%Y-%m-%d").date() | |
date_range = pd.date_range(start=start_date, end=end_date).date | |
daily_data = df[df['Year'] == selected_year].groupby('Review Date').size().reset_index(name='Counts') | |
daily_data['Review Date'] = pd.to_datetime(daily_data['Review Date']).dt.date | |
all_dates_df = pd.DataFrame(date_range, columns=['Review Date']) | |
all_dates_df['Review Date'] = pd.to_datetime(all_dates_df['Review Date']).dt.date | |
merged_data = pd.merge(all_dates_df, daily_data, on='Review Date', how='left') | |
merged_data['Counts'] = merged_data['Counts'].fillna(0).astype(int) | |
return merged_data | |
def analyze_option_data(df): | |
data_counts = df['Option1'].value_counts().reset_index() | |
data_counts.columns = ['Option', 'Counts'] | |
total_counts = data_counts['Counts'].sum() | |
data_counts['Percentage'] = (data_counts['Counts'] / total_counts * 100).round(2) | |
data_counts.sort_values(by='Counts', ascending=False, inplace=True) | |
return data_counts | |
def analyze_option_review_data(df): | |
df["Option1"] = df["Option"].apply(lambda x: x.split(" / ")[0] if isinstance(x, str) else x) | |
df["Option2"] = df["Option"].apply( | |
lambda x: x.split(" / ")[1] if isinstance(x, str) and len(x.split(" / ")) > 1 else "") | |
review_counts = df.groupby(["Option1", "Option2"])["ReviewScore"].value_counts().unstack(fill_value=0) | |
review_counts["Total"] = review_counts.sum(axis=1) | |
option1_counts = df.groupby("Option1")["Option"].count() | |
option2_counts = df.groupby(["Option1", "Option2"])["Option"].count() | |
review_counts["์ต์ ๋ช (1์ฐจ)๊ฑด์"] = review_counts.index.get_level_values("Option1").map(option1_counts) | |
review_counts["์ต์ ๋ช (2์ฐจ)๊ฑด์"] = option2_counts | |
review_counts.sort_values(by=["์ต์ ๋ช (1์ฐจ)๊ฑด์", "์ต์ ๋ช (2์ฐจ)๊ฑด์"], ascending=[False, False], inplace=True) | |
return review_counts | |
def analyze_option_data_for_year(df, selected_year): | |
df_year = df[df['Year'] == selected_year] | |
data_counts = df_year['Option1'].value_counts().reset_index() | |
data_counts.columns = ['Option', 'Counts'] | |
total_counts = data_counts['Counts'].sum() | |
data_counts['Percentage'] = (data_counts['Counts'] / total_counts * 100).round(2) | |
data_counts.sort_values(by='Counts', ascending=False, inplace=True) | |
return data_counts | |
def analyze_option_review_data_for_year(df, selected_year): | |
df_year = df[df['Year'] == selected_year].copy() | |
df_year.loc[:, "Option1"] = df_year["Option"].apply(lambda x: x.split(" / ")[0] if isinstance(x, str) else x) | |
df_year.loc[:, "Option2"] = df_year["Option"].apply( | |
lambda x: x.split(" / ")[1] if isinstance(x, str) and len(x.split(" / ")) > 1 else "") | |
review_counts = df_year.groupby(["Option1", "Option2"])["ReviewScore"].value_counts().unstack(fill_value=0) | |
review_counts["Total"] = review_counts.sum(axis=1) | |
option1_counts = df_year.groupby("Option1")["Option"].count() | |
option2_counts = df_year.groupby(["Option1", "Option2"])["Option"].count() | |
review_counts["์ต์ ๋ช (1์ฐจ)๊ฑด์"] = review_counts.index.get_level_values("Option1").map(option1_counts) | |
review_counts["์ต์ ๋ช (2์ฐจ)๊ฑด์"] = option2_counts | |
review_counts.sort_values(by=["์ต์ ๋ช (1์ฐจ)๊ฑด์", "์ต์ ๋ช (2์ฐจ)๊ฑด์"], ascending=[False, False], inplace=True) | |
return review_counts | |
def extract_longest_reviews(df): | |
longest_reviews = df.groupby('ReviewScore').apply( | |
lambda x: x.nlargest(100, 'Review Length', keep='all')).reset_index(drop=True) | |
return longest_reviews.drop( | |
columns=['Review Length', 'Year-Month', 'Year', 'Month', 'Day', 'Option1', 'Option2']) # ์ญ์ ๋ ์ด๋ค | |
def save_to_excel(original_data, monthly_counts, yearly_counts, monthly_trend, daily_counts, option_counts, | |
review_counts, selected_option_counts, selected_review_counts, longest_reviews): | |
wb = openpyxl.Workbook() | |
# ์๋ณธ ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ ์ํธ ์ถ๊ฐ ๋ฐ ์ด๋ฆ ๋ณ๊ฒฝ | |
ws_original = wb.active | |
ws_original.title = "์๋ณธ๋ฆฌ๋ทฐ๋ฐ์ดํฐ" | |
for r in dataframe_to_rows(original_data, index=False, header=True): | |
ws_original.append(r) | |
ws_original.sheet_properties.tabColor = "000000" # ๊ฒ์์ | |
# ๋ฆฌ๋ทฐ๋ถ์ ์ถ์ด ์ํธ ์ถ๊ฐ | |
ws1 = wb.create_sheet(title="์ ์ฒด์๋ณ์ถ์ด(๋ฆฌ๋ทฐ๋ถ์)") | |
for r in dataframe_to_rows(monthly_counts, index=False, header=True): | |
ws1.append(r) | |
chart1 = BarChart() | |
chart1.type = "col" | |
chart1.style = 10 | |
chart1.title = "Monthly Review Trends" | |
chart1.y_axis.title = 'Review Counts' | |
chart1.x_axis.title = 'Year-Month' | |
data1 = Reference(ws1, min_col=2, min_row=1, max_row=ws1.max_row, max_col=2) | |
cats1 = Reference(ws1, min_col=1, min_row=2, max_row=ws1.max_row) | |
chart1.add_data(data1, titles_from_data=True) | |
chart1.set_categories(cats1) | |
chart1.width = 30 | |
chart1.height = 15 | |
ws1.add_chart(chart1, "C2") | |
ws1.sheet_properties.tabColor = "FFA500" # ์ฃผํฉ์ | |
# ๋ ๋๋ณ ๋ฆฌ๋ทฐ ๋ถ์ ์ํธ ์ถ๊ฐ | |
ws2 = wb.create_sheet(title="๋ ๋๋ณ์ถ์ด(๋ฆฌ๋ทฐ๋ถ์)") | |
for r in dataframe_to_rows(yearly_counts, index=False, header=True): | |
ws2.append(r) | |
chart2 = BarChart() | |
chart2.type = "col" | |
chart2.style = 10 | |
chart2.title = "Yearly Review Trends" | |
chart2.y_axis.title = 'Review Counts' | |
chart2.x_axis.title = 'Year' | |
data2 = Reference(ws2, min_col=2, min_row=1, max_row=ws2.max_row, max_col=2) | |
cats2 = Reference(ws2, min_col=1, min_row=2, max_row=ws2.max_row) | |
chart2.add_data(data2, titles_from_data=True) | |
chart2.set_categories(cats2) | |
chart2.width = 30 | |
chart2.height = 15 | |
ws2.add_chart(chart2, "C2") | |
ws2.sheet_properties.tabColor = "FFA500" # ์ฃผํฉ์ | |
# ์๋ณ ๋ฆฌ๋ทฐ ๋ถ์ ์ํธ ์ถ๊ฐ | |
ws3 = wb.create_sheet(title="์ ํํ ๋ ๋ ์๋ณ์ถ์ด(๋ฆฌ๋ทฐ๋ถ์)") | |
for r in dataframe_to_rows(monthly_trend, index=False, header=True): | |
ws3.append(r) | |
chart3 = BarChart() | |
chart3.type = "col" | |
chart3.style = 10 | |
chart3.title = "Monthly Trends for Selected Year" | |
chart3.y_axis.title = 'Review Counts' | |
chart3.x_axis.title = 'Month' | |
data3 = Reference(ws3, min_col=2, min_row=1, max_row=ws3.max_row, max_col=2) | |
cats3 = Reference(ws3, min_col=1, min_row=2, max_row=ws3.max_row) | |
chart3.add_data(data3, titles_from_data=True) | |
chart3.set_categories(cats3) | |
chart3.width = 30 | |
chart3.height = 15 | |
ws3.add_chart(chart3, "C2") | |
ws3.sheet_properties.tabColor = "FFA500" # ์ฃผํฉ์ | |
# ์ผ๋ณ ๋ฆฌ๋ทฐ ๋ถ์ ์ํธ ์ถ๊ฐ | |
ws4 = wb.create_sheet(title="์ ํํ ๋ ๋ ์ผ๋ณ์ถ์ด(๋ฆฌ๋ทฐ๋ถ์)") | |
for r in dataframe_to_rows(daily_counts, index=False, header=True): | |
ws4.append(r) | |
chart4 = BarChart() | |
chart4.type = "col" | |
chart4.style = 10 | |
chart4.title = "Daily Trends for Selected Year" | |
chart4.y_axis.title = 'Review Counts' | |
chart4.x_axis.title = 'Date' | |
data4 = Reference(ws4, min_col=2, min_row=2, max_row=ws4.max_row + 1, max_col=2) | |
cats4 = Reference(ws4, min_col=1, min_row=2, max_row=ws4.max_row + 1) | |
chart4.add_data(data4, titles_from_data=True) | |
chart4.set_categories(cats4) | |
chart4.width = 50 | |
chart4.height = 15 | |
ws4.add_chart(chart4, "C2") | |
ws4.sheet_properties.tabColor = "FFA500" # ์ฃผํฉ์ | |
# ์ต์ ๋ถ์ ๊ฒฐ๊ณผ ์ํธ ์ถ๊ฐ | |
ws5 = wb.create_sheet(title="์ต์ ๋ถ์ ๊ฒฐ๊ณผ(์ต์ ๋ถ์)") | |
for r in dataframe_to_rows(option_counts, index=False, header=True): | |
ws5.append(r) | |
bar_chart = BarChart() | |
data = Reference(ws5, min_col=2, min_row=2, max_row=ws5.max_row, max_col=2) | |
cats = Reference(ws5, min_col=1, min_row=2, max_row=ws5.max_row, max_col=1) | |
bar_chart.add_data(data, titles_from_data=False) | |
bar_chart.set_categories(cats) | |
bar_chart.title = "Option Analysis (Counts)" | |
bar_chart.width = 40 | |
bar_chart.height = 20 | |
ws5.add_chart(bar_chart, "G2") | |
ws5.sheet_properties.tabColor = "0000FF" # ํ๋์ | |
# Create pie chart | |
top_10 = option_counts.head(10) | |
for idx, row in enumerate(top_10.itertuples(), 1): | |
ws5.cell(row=idx + 1, column=5, value=row.Option) | |
ws5.cell(row=idx + 1, column=6, value=row.Counts) | |
others_sum = option_counts['Counts'][10:].sum() | |
ws5.cell(row=12, column=5, value='Others') | |
ws5.cell(row=12, column=6, value=others_sum) | |
ws5.cell(row=1, column=5, value='Option') | |
ws5.cell(row=1, column=6, value='Counts') | |
pie_chart = PieChart() | |
data = Reference(ws5, min_col=6, min_row=2, max_row=12) | |
categories = Reference(ws5, min_col=5, min_row=2, max_row=12) | |
pie_chart.add_data(data, titles_from_data=False) | |
pie_chart.set_categories(categories) | |
pie_chart.title = "Top 10 Options (Share)" | |
pie_chart.dataLabels = DataLabelList() | |
pie_chart.dataLabels.showPercent = True | |
pie_chart.width = 30 | |
pie_chart.height = 20 | |
ws5.add_chart(pie_chart, "G40") | |
# ์ต์ ๋ณํ์ ๋ถ์ ์ํธ ์ถ๊ฐ | |
ws6 = wb.create_sheet(title="์ต์ ๋ณํ์ ๋ถ์(์ต์ ๋ถ์)") | |
ws6.append( | |
["Option1", "Option2", "Total Counts", "Score 5", "Score 4", "Score 3", "Score 2", "Score 1", "Option1 Counts", | |
"Option2 Counts"]) | |
for r in dataframe_to_rows(review_counts, index=True, header=False): | |
ws6.append(r) | |
ws6.sheet_properties.tabColor = "0000FF" # ํ๋์ | |
# ์ ํํ ๋ ๋ ์ต์ ๋ถ์ ๊ฒฐ๊ณผ ์ํธ ์ถ๊ฐ | |
ws7 = wb.create_sheet(title="์ ํํ ๋ ๋ ์ต์ ๋ถ์ ๊ฒฐ๊ณผ(์ต์ ๋ถ์)") | |
for r in dataframe_to_rows(selected_option_counts, index=False, header=True): | |
ws7.append(r) | |
bar_chart_selected = BarChart() | |
data_selected = Reference(ws7, min_col=2, min_row=2, max_row=ws7.max_row, max_col=2) | |
cats_selected = Reference(ws7, min_col=1, min_row=2, max_row=ws7.max_row, max_col=1) | |
bar_chart_selected.add_data(data_selected, titles_from_data=False) | |
bar_chart_selected.set_categories(cats_selected) | |
bar_chart_selected.title = "Option Analysis for Selected Year (Counts)" | |
bar_chart_selected.width = 40 | |
bar_chart_selected.height = 20 | |
ws7.add_chart(bar_chart_selected, "G2") | |
ws7.sheet_properties.tabColor = "0000FF" # ํ๋์ | |
# Create pie chart for selected year | |
top_10_selected = selected_option_counts.head(10) | |
for idx, row in enumerate(top_10_selected.itertuples(), 1): | |
ws7.cell(row=idx + 1, column=5, value=row.Option) | |
ws7.cell(row=idx + 1, column=6, value=row.Counts) | |
others_sum_selected = selected_option_counts['Counts'][10:].sum() | |
ws7.cell(row=12, column=5, value='Others') | |
ws7.cell(row=12, column=6, value=others_sum_selected) | |
ws7.cell(row=1, column=5, value='Option') | |
ws7.cell(row=1, column=6, value='Counts') | |
pie_chart_selected = PieChart() | |
data_selected_pie = Reference(ws7, min_col=6, min_row=2, max_row=12) | |
categories_selected_pie = Reference(ws7, min_col=5, min_row=2, max_row=12) | |
pie_chart_selected.add_data(data_selected_pie, titles_from_data=False) | |
pie_chart_selected.set_categories(categories_selected_pie) | |
pie_chart_selected.title = "Top 10 Options for Selected Year (Share)" | |
pie_chart_selected.dataLabels = DataLabelList() | |
pie_chart_selected.dataLabels.showPercent = True | |
pie_chart_selected.width = 30 | |
pie_chart_selected.height = 20 | |
ws7.add_chart(pie_chart_selected, "G40") | |
# ์ ํํ ๋ ๋ ์ต์ ๋ณํ์ ๋ถ์ ์ํธ ์ถ๊ฐ | |
ws8 = wb.create_sheet(title="์ ํํ ๋ ๋ ์ต์ ๋ณํ์ ๋ถ์(์ต์ ๋ถ์)") | |
ws8.append( | |
["Option1", "Option2", "Total Counts", "Score 5", "Score 4", "Score 3", "Score 2", "Score 1", "Option1 Counts", | |
"Option2 Counts"]) | |
for r in dataframe_to_rows(selected_review_counts, index=True, header=False): | |
ws8.append(r) | |
ws8.sheet_properties.tabColor = "0000FF" # ํ๋์ | |
# ๋ฆฌ๋ทฐ ๋ด์ฉ์ด ๊ธด ๋ฆฌ๋ทฐ ์ํธ ์ถ๊ฐ | |
ws9 = wb.create_sheet(title="๊ธด ๋ฆฌ๋ทฐ ๋ด์ฉ") | |
for r in dataframe_to_rows(longest_reviews, index=False, header=True): | |
ws9.append(r) | |
ws9.sheet_properties.tabColor = "00FF00" # ์ด๋ก์ | |
file_path = "๋ฆฌ๋ทฐ๋ถ์ ๋ค์ด๋ก๋.xlsx" | |
wb.save(file_path) | |
return file_path | |
def generate_plots(df, year): | |
# ์ต๊ทผ 3๋ ์ ๋ฐ์ดํฐ๋ง ์ฌ์ฉ | |
start_year = datetime.now().year - 2 | |
recent_data = df[df['Year'].astype(int) >= start_year] | |
monthly_counts, yearly_counts = analyze_data(df) # Use all data for yearly counts | |
recent_monthly_counts, _ = analyze_data(recent_data) # Use recent data for monthly counts | |
monthly_trend = analyze_monthly_data_for_year(recent_data, year) | |
daily_counts = analyze_daily_data(recent_data, year) | |
option_counts = analyze_option_data(recent_data) | |
plot_files = [] | |
# ์๋ณ ๋ฆฌ๋ทฐ ์ถ์ด ๊ทธ๋ํ ์์ฑ | |
fig1, ax1 = plt.subplots() | |
ax1.plot(recent_monthly_counts['Year-Month'], recent_monthly_counts['Counts'], marker='o') | |
ax1.set_title('Monthly Review Trends (Recent 3 Years)', fontsize=16) # ์ ๋ชฉ ํฐํธ ํฌ๊ธฐ ์ค์ | |
ax1.set_ylabel('Review Counts', fontsize=14) # y์ถ ๋ ์ด๋ธ ํฐํธ ํฌ๊ธฐ ์ค์ | |
# x์ถ ๋ ์ด๋ธ์ 90๋ ํ์ ํ์ฌ ํ์ํ๊ณ ํฐํธ ํฌ๊ธฐ ์ค์ | |
ax1.tick_params(axis='x', rotation=90, labelsize=6) | |
tmp_file1 = tempfile.NamedTemporaryFile(delete=False, suffix=".png") | |
fig1.savefig(tmp_file1.name) | |
plot_files.append(tmp_file1.name) | |
fig2, ax2 = plt.subplots() | |
ax2.bar(yearly_counts['Year'], yearly_counts['Counts']) | |
ax2.set_title('Yearly Review Trends') | |
ax2.set_xlabel('Year') | |
ax2.set_ylabel('Review Counts') | |
tmp_file2 = tempfile.NamedTemporaryFile(delete=False, suffix=".png") | |
fig2.savefig(tmp_file2.name) | |
plot_files.append(tmp_file2.name) | |
fig3, ax3 = plt.subplots() | |
ax3.bar(monthly_trend['Month'], monthly_trend['Counts']) | |
ax3.set_title('Monthly Trends for Selected Year') | |
ax3.set_xlabel('Month') | |
ax3.set_ylabel('Review Counts') | |
tmp_file3 = tempfile.NamedTemporaryFile(delete=False, suffix=".png") | |
fig3.savefig(tmp_file3.name) | |
plot_files.append(tmp_file3.name) | |
fig4, ax4 = plt.subplots() | |
ax4.bar(daily_counts['Review Date'], daily_counts['Counts']) | |
ax4.set_title('Daily Trends for Selected Year') | |
ax4.set_xlabel('Date') | |
ax4.set_ylabel('Review Counts') | |
tmp_file4 = tempfile.NamedTemporaryFile(delete=False, suffix=".png") | |
fig4.savefig(tmp_file4.name) | |
plot_files.append(tmp_file4.name) | |
return plot_files | |
def process_file(file, year): | |
df = read_excel_data(file) | |
monthly_counts, yearly_counts = analyze_data(df) | |
monthly_trend = analyze_monthly_data_for_year(df, year) | |
daily_counts = analyze_daily_data(df, year) | |
option_counts = analyze_option_data(df) | |
review_counts = analyze_option_review_data(df) | |
selected_option_counts = analyze_option_data_for_year(df, year) | |
selected_review_counts = analyze_option_review_data_for_year(df, year) | |
longest_reviews = extract_longest_reviews(df) | |
original_data = pd.read_excel(file, sheet_name=0, engine='openpyxl') # ์ฒซ ๋ฒ์งธ ์ํธ๋ง ๋ก๋ | |
result_file = save_to_excel(original_data, monthly_counts, yearly_counts, monthly_trend, daily_counts, | |
option_counts, review_counts, selected_option_counts, selected_review_counts, | |
longest_reviews) | |
return result_file | |
# ํ์ผ์ ์ ์ฅํ๊ณ ๋ค์ด๋ก๋ URL์ ์์ฑํ๋ ํจ์ | |
def generate_download_links(plots): | |
download_links = [] | |
for i, plot in enumerate(plots): | |
if os.path.exists(plot): | |
with open(plot, "rb") as image_file: | |
encoded_string = base64.b64encode(image_file.read()).decode() | |
data_url = f"image/png;base64,{encoded_string}" | |
download_links.append(data_url) | |
return json.dumps(download_links) | |
def process_file_with_plots(file, year): | |
df = read_excel_data(file) | |
result_file = process_file(file, year) | |
plots = generate_plots(df, year) | |
return [result_file] + plots | |
def process_file_with_plots2(file, year): | |
df = read_excel_data(file) | |
result_file = process_file(file, year) | |
plots = generate_plots(df, year) | |
download_links = generate_download_links(plots) | |
return_values = [result_file] + download_links | |
return return_values | |
years = [str(year) for year in range(datetime.now().year, datetime.now().year - 10, -1)] | |
def predict(file, year): | |
return process_file_with_plots(file, year) | |
def predict_api(file, year): | |
return process_file_with_plots2(file, year) | |
# ๊ธ์ ์ ์ธ ๋ฆฌ๋ทฐ๋ฅผ ๋ฐํํ๋ ํจ์ | |
def get_positive_reviews(df, years, option_analysis): | |
df = df[df['Year'].isin(years)] | |
if option_analysis != "์ ์ฒด์ต์ ๋ถ์": | |
top_n = int(option_analysis.split("(")[1].split("๊ฐ")[0]) | |
top_options = df['Option1'].value_counts().head(top_n).index.tolist() | |
df = df[df['Option1'].isin(top_options)] | |
positive_reviews = df[(df['ReviewScore'] == 5) & (df['Review Length'] <= 500)].sort_values(by='Review Length', | |
ascending=False) | |
if len(positive_reviews) < 20: | |
additional_reviews = df[(df['ReviewScore'] == 4) & (df['Review Length'] <= 500)].sort_values(by='Review Length', | |
ascending=False) | |
positive_reviews = pd.concat([positive_reviews, additional_reviews]) | |
positive_reviews = positive_reviews.head(20) | |
positive_reviews.reset_index(drop=True, inplace=True) | |
positive_reviews.index += 1 | |
positive_reviews['์๋ฒ'] = positive_reviews.index | |
return "\n\n".join(positive_reviews.apply( | |
lambda x: f"{x['์๋ฒ']}. **{x['Review Date']} / {x['ID']} / {x['Option']}**\n\n{x['Review']}", axis=1)) | |
# ๋ถ์ ์ ์ธ ๋ฆฌ๋ทฐ๋ฅผ ๋ฐํํ๋ ํจ์ | |
def get_negative_reviews(df, years, option_analysis): | |
df = df[df['Year'].isin(years)] | |
if option_analysis != "์ ์ฒด์ต์ ๋ถ์": | |
top_n = int(option_analysis.split("(")[1].split("๊ฐ")[0]) | |
top_options = df['Option1'].value_counts().head(top_n).index.tolist() | |
df = df[df['Option1'].isin(top_options)] | |
negative_reviews = df[(df['ReviewScore'] == 1) & (df['Review Length'] <= 500)].sort_values(by='Review Length', | |
ascending=False) | |
if len(negative_reviews) < 30: | |
additional_reviews = df[(df['ReviewScore'] == 2) & (df['Review Length'] <= 500)].sort_values(by='Review Length', | |
ascending=False) | |
negative_reviews = pd.concat([negative_reviews, additional_reviews]) | |
negative_reviews = negative_reviews.head(30) | |
negative_reviews.reset_index(drop=True, inplace=True) | |
negative_reviews.index += 1 | |
negative_reviews['์๋ฒ'] = negative_reviews.index | |
return "\n\n".join(negative_reviews.apply( | |
lambda x: f"{x['์๋ฒ']}. **{x['Review Date']} / {x['ID']} / {x['Option']}**\n\n{x['Review']}", axis=1)) | |
# ๋ฆฌ๋ทฐ ์ ๋ฐ์ดํธ ๋ฐ ๋ถ์ ํ๋กฌํํธ ์์ฑ ํจ์ | |
def update_reviews(file, years, option_analysis): | |
df = read_excel_data(file) | |
positive_reviews = get_positive_reviews(df, years, option_analysis) | |
negative_reviews = get_negative_reviews(df, years, option_analysis) | |
positive_prompt = f"{positive_reviews}\n\n{prompts['๊ธ์ ์ ์ธ ๋ฆฌ๋ทฐ๋ถ์']}" | |
negative_prompt = f"{negative_reviews}\n\n{prompts['๋ถ์ ์ ์ธ ๋ฆฌ๋ทฐ๋ถ์']}" | |
return positive_reviews, negative_reviews, positive_prompt, negative_prompt | |
async def analyze_all_async(positive_prompt, negative_prompt): | |
async with aiohttp.ClientSession() as session: | |
positive_task = generate_section_async( | |
session, | |
review_output=positive_prompt, | |
system_message=prompts["๊ธ์ ์ ์ธ ๋ฆฌ๋ทฐ๋ถ์"], | |
max_tokens=10000, | |
temperature=0.3, | |
top_p=0.95, | |
) | |
negative_task = generate_section_async( | |
session, | |
review_output=negative_prompt, | |
system_message=prompts["๋ถ์ ์ ์ธ ๋ฆฌ๋ทฐ๋ถ์"], | |
max_tokens=10000, | |
temperature=0.4, | |
top_p=0.95, | |
) | |
positive_analysis, negative_analysis = await asyncio.gather(positive_task, negative_task) | |
return positive_analysis[0], negative_analysis[0] | |
def analyze_all(positive_prompt, negative_prompt): | |
return asyncio.run(analyze_all_async(positive_prompt, negative_prompt)) | |
# Function to generate analysis for each review type | |
async def generate_section_async(session, review_output, system_message, max_tokens, temperature, top_p): | |
prompt = f"{review_output}\n\n{system_message}" | |
response = await call_openai_api_async(session, prompt, system_message, max_tokens, temperature, top_p) | |
return response, prompt | |
# Function to call the API | |
async def call_openai_api_async(session, content, system_message, max_tokens, temperature, top_p): | |
async with session.post( | |
"https://api.openai.com/v1/chat/completions", | |
headers={"Authorization": f"Bearer {openai_api_key}"}, | |
json={ | |
"model": "gpt-4o-mini", | |
"messages": [ | |
{"role": "system", "content": system_message}, | |
{"role": "user", "content": content}, | |
], | |
"max_tokens": max_tokens, | |
"temperature": temperature, | |
"top_p": top_p, | |
} | |
) as response: | |
result = await response.json() | |
return result['choices'][0]['message']['content'] | |
prompts = { | |
"๊ธ์ ์ ์ธ ๋ฆฌ๋ทฐ๋ถ์": """[์ค์ ๊ท์น] | |
1. ๋ฐ๋์ ํ๊ธ(ํ๊ตญ์ด)๋ก ์ถ๋ ฅํ๋ผ. | |
2. ๋๋ ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ๋ฅผ ๋ถ์ํ๋ ๋น ๋ฐ์ดํฐ ๋ถ์๊ฐ์ด๋ค. | |
3. ๊ณ ๊ฐ์ ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ๋ฅผ ๋ฐํ์ผ๋ก ๊ธ์ ์ ์ธ ์๊ฒฌ์ ๋ฐ์ดํฐ๋ง ๋ถ์ํ๋ผ. | |
4. ๋ฐ๋์ ์ ๊ณต๋ ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ์์๋ง ๋ถ์ํ๋ผ. | |
5. ๋์ ์๊ฐ์ ํฌํจํ์ง ๋ง ๊ฒ. | |
[๋ถ์ ์กฐ๊ฑด] | |
1. ์ด 20๊ฐ์ ๋ฆฌ๋ทฐ๋ฐ์ดํฐ๋ฅผ ์ ๊ณตํ๋ค. | |
2. ๊ฐ ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ์ ๋์งธ์ค ๋ถํฐ์ ์ค์ ๊ณ ๊ฐ๋ฆฌ๋ทฐ๋ฅผ ๋ฐ์ํ๋ผ. | |
3. ๋ฐ๋์ ๊ธ์ ์ ์ธ ์๊ฒฌ๋ง์ ๋ถ์ํ๋ผ. ๋ถ์ ์ ์ธ ์๊ฒฌ์ ์ ์ธํ๋ผ. | |
4. ๊ธฐ๋ฅ๊ณผ ์ฑ๋ฅ์ ๋ถ๋ถ, ๊ฐ์ฑ์ ์ธ ๋ถ๋ถ, ์ค์ ์ฌ์ฉ ์ธก๋ฉด์ ๋ถ๋ถ, ๋ฐฐ์ก์ ๋ถ๋ถ, ํ๊ฒ๋ณ ๋ถ๋ถ์ ์น์ ์ผ๋ก ๊ตฌ๋ถํ์ฌ ๋ถ์ํ๋ผ. | |
5. 4๋ฒ์ ์กฐ๊ฑด์ ํฌํจ๋์ง ์๋ ๊ธ์ ์ ์ธ ๋ฆฌ๋ทฐ๋ฅผ ๋ณ๋ ์น์ ์ผ๋ก ์ถ๋ ฅํ๋ผ. | |
6. ๋ง์ผํ ์ ์ธ ์์๋ก ์ฌ์ฉํ ์ ์๋ ๊ณ ๊ฐ์ ์ค์ ๋ฆฌ๋ทฐ๋ฅผ ๋ฐ์ํ๋ผ. | |
[์ถ๋ ฅ ํํ ์กฐ๊ฑด] | |
1. ๊ฐ๊ฐ์ ์ ๋ชฉ ์์ '๐'์ด๋ชจ์ง๋ฅผ ์ถ๋ ฅํ๋ผ,'#', '##'์ ์ถ๋ ฅํ์ง ๋ง๊ฒ. | |
2. ๊ฐ์ฅ ๋ง์ง๋ง์ ์ข ํฉ ์๊ฒฌ์ ์์ฑํ๋ผ, "๐์ข ํฉ์๊ฒฌ"์ ์ ๋ชฉํํ๋ฅผ ์ฌ์ฉํ๋ผ. | |
[์ข ํฉ์๊ฒฌ์ ์ถ๋ ฅ ์กฐ๊ฑด ์์] | |
('์ข ํฉ์๊ฒฌ'์ด ์๋ ๋ค๋ฅธ ๋ถ๋ถ์ ์ด ์ถ๋ ฅ ์กฐ๊ฑด์ ๋ฐ์ํ์ง ๋ง ๊ฒ. | |
- ํญ๋ชฉ๋ณ ์ ๋ชฉ์ ์ ์ธํ๋ผ. | |
- ์ข ํฉ์๊ฒฌ์๋ ํญ๋ชฉ๋ณ ์ ๋ชฉ์ ์ ์ธํ๊ณ ์์ ์ ๋ฌธ์ฅ์ผ๋ก ์์ฑํ๋ผ. | |
- ๋งค์ถ์ ๊ทน๋ํ ํ ์ ์๋ ๊ณ ๊ฐ์ ์ค์ ๋ฆฌ๋ทฐ ํฌ์ธํธ๋ฅผ ์ ์ํ๋ผ. | |
[SWOT๋ถ์ ์กฐ๊ฑด] | |
1. '์ข ํฉ์๊ฒฌ' ๋ค์ ๋ด์ฉ์ผ๋ก SWOT๋ถ์ ์๊ฒฌ์ ์ถ๋ ฅํ๋ผ. | |
2. SWOT๋ถ์ ์ค '๊ฐ์ '์๊ฒฌ๊ณผ '๊ธฐํ'์ ์๊ฒฌ์ ์ถ๋ ฅํ๋ผ. | |
3. ๋ฐ๋์ '์ข ํฉ์๊ฒฌ'์ ๋ด์ฉ์ ๊ธฐ๋ฐ์ผ๋ก ์์ฑํ๋ผ. | |
4. ์ ๋ชฉ์ '๐น ๊ฐ์ ', '๐น ๊ธฐํ'์ผ๋ก ์ถ๋ ฅํ๋ผ. | |
[์ข ํฉ์๊ฒฌ์ ์ถ๋ ฅ ์กฐ๊ฑด ๋] | |
3. ์ค์ ๊ณ ๊ฐ์ ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ์์ ์ฌ์ฉ๋ ๋จ์ด๋ฅผ ํฌํจํ๋ผ. | |
4. ๋์ ์๊ฐ์ ์์๋ก ๋ฃ์ง ๋ง ๊ฒ. | |
""", | |
"๋ถ์ ์ ์ธ ๋ฆฌ๋ทฐ๋ถ์": """[์ค์ ๊ท์น] | |
1. ๋ฐ๋์ ํ๊ธ(ํ๊ตญ์ด)๋ก ์ถ๋ ฅํ๋ผ. | |
2. ๋๋ ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ๋ฅผ ๋ถ์ํ๋ ๋น ๋ฐ์ดํฐ ๋ถ์๊ฐ์ด๋ค. | |
3. ๊ณ ๊ฐ์ ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ๋ฅผ ๋ฐํ์ผ๋ก ๋ถ์ ์ ์ธ ์๊ฒฌ์ ๋ฐ์ดํฐ๋ง ๋ถ์ํ๋ผ. | |
4. ๋ฐ๋์ ์ ๊ณต๋ ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ์์๋ง ๋ถ์ํ๋ผ. | |
5. ๋์ ์๊ฐ์ ํฌํจํ์ง ๋ง ๊ฒ. | |
[๋ถ์ ์กฐ๊ฑด] | |
1. ์ด 30๊ฐ์ ๋ฆฌ๋ทฐ๋ฐ์ดํฐ๋ฅผ ์ ๊ณตํ๋ค. | |
2. ๊ฐ ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ์ ๋์งธ์ค ๋ถํฐ์ ์ค์ ๊ณ ๊ฐ๋ฆฌ๋ทฐ๋ฅผ ๋ฐ์ํ๋ผ. | |
3. ๋ถ์ ์ ์ธ ์๊ฒฌ๋ง์ ๋ถ์ํ๋ผ. | |
4. ๊ธฐ๋ฅ๊ณผ ์ฑ๋ฅ์ ๋ถ๋ถ, ๊ฐ์ฑ์ ์ธ ๋ถ๋ถ, ์ค์ ์ฌ์ฉ ์ธก๋ฉด์ ๋ถ๋ถ, ๋ฐฐ์ก์ ๋ถ๋ถ, ๊ณ ๊ฐ์ ๋ถ๋ ธ ๋ถ๋ถ์ ์น์ ์ผ๋ก ๊ตฌ๋ถํ์ฌ ๋ถ์ํ๋ผ. | |
5. 4๋ฒ์ ์กฐ๊ฑด์ ํฌํจ๋์ง ์๋ ๋ถ์ ์ ์ธ ๋ฆฌ๋ทฐ๋ฅผ ๋ณ๋ ์น์ ์ผ๋ก ์ถ๋ ฅํ๋ผ. | |
6. ๋ถ์ ์ ์ธ ๋ฆฌ๋ทฐ ๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ๋ฐํ์ผ๋ก '๊ฐ์ ํ ์ '์ ์ถ๋ ฅํ๋ผ. | |
[์ถ๋ ฅ ํํ ์กฐ๊ฑด] | |
1. ๊ฐ๊ฐ์ ์ ๋ชฉ ์์ '๐'์ด๋ชจ์ง๋ฅผ ์ถ๋ ฅํ๋ผ,'#', '##'์ ์ถ๋ ฅํ์ง ๋ง๊ฒ. | |
2. ๊ฐ์ฅ ๋ง์ง๋ง์ '๊ฐ์ ํ ์ '์ ์ถ๋ ฅํ๋ผ("๐ข๊ฐ์ ํ ์ "์ ์ ๋ชฉํํ๋ฅผ ์ฌ์ฉํ๋ผ.) | |
[๊ฐ์ ํ ์ ์ ์ถ๋ ฅ ์กฐ๊ฑด ์์] | |
('๊ฐ์ ํ ์ '์ด ์๋ ๋ค๋ฅธ ๋ถ๋ถ์ ์ด ์ถ๋ ฅ ์กฐ๊ฑด์ ๋ฐ์ํ์ง ๋ง ๊ฒ. | |
- ํญ๋ชฉ๋ณ ์ ๋ชฉ์ ์ ์ธํ๋ผ. | |
- ์ฃผ์ ํญ๋ชฉ๋ณ๋ก ๊ฐ์ ํ ์ ์ ์ถ๋ ฅํ๋ผ. | |
- ์ ๋ฌธ์ ์ด๊ณ , ๋ถ์์ ์ด๋ฉฐ, ์ ์ํ๋ ํํ์ ๊ณต์ํ ์ดํฌ๋ฅผ ์ฌ์ฉํ๋ผ.(๋จ๋ตํ ํํ ๊ธ์ง) | |
[SWOT๋ถ์ ์กฐ๊ฑด] | |
1. '์ข ํฉ์๊ฒฌ' ๋ค์ ๋ด์ฉ์ผ๋ก SWOT๋ถ์ ์๊ฒฌ์ ์ถ๋ ฅํ๋ผ. | |
2. SWOT๋ถ์ ์ค '์ฝ์ '์๊ฒฌ๊ณผ '์ํ'์ ์๊ฒฌ์ ์ถ๋ ฅํ๋ผ. | |
3. ๋ฐ๋์ '๊ฐ์ ํ ์ '์ ๋ด์ฉ์ ๊ธฐ๋ฐ์ผ๋ก ์์ฑํ๋ผ. | |
4. ์ ๋ชฉ์ '๐ ์ฝ์ ', '๐ ์ํ'์ผ๋ก ์ถ๋ ฅํ๋ผ. | |
[๊ฐ์ ํ ์ ์ ์ถ๋ ฅ ์กฐ๊ฑด ๋] | |
3. ์ค์ ๊ณ ๊ฐ์ ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ์์ ์ฌ์ฉ๋ ๋จ์ด๋ฅผ ํฌํจํ๋ผ. | |
4. ๋์ ์๊ฐ์ ์์๋ก ๋ฃ์ง ๋ง ๊ฒ. | |
""" | |
} | |
def select_all_years(): | |
current_year = datetime.now().year | |
return [str(year) for year in range(current_year, current_year - 5, -1)] | |
def deselect_all_years(): | |
return [] | |
with gr.Blocks() as ๋ฆฌ๋ทฐ์ถ์ด_๋ถ์: | |
gr.Markdown("### ์์ ํ์ผ ์ ๋ก๋") | |
file_input = gr.File(label="", file_types=["xlsx"]) | |
year_selection = gr.Radio(years, label="๋ถ์๋ ๋ ์ ํ", value=str(datetime.now().year)) | |
analyze_button = gr.Button("๋ถ์ ์คํ") | |
outputs = [ | |
gr.File(label="์ธ๋ถ๋ถ์ ์๋ฃ๋ฅผ ๋ค์ด๋ฐ์ผ์ธ์(Excelํ์ผ)"), | |
gr.Image(label="์ต๊ทผ3๋ ๊ฐ ์๋ณ ๋ฆฌ๋ทฐ์ถ์ด"), | |
gr.Image(label="์ต๊ทผ ๋ ๋๋ณ ๋ฆฌ๋ทฐ์ถ์ด"), | |
gr.Image(label="์ ํ๋ ๋ ์ ๋ฆฌ๋ทฐ์ถ์ด"), | |
gr.Image(label="์ ํ๋ ๋ ์ผ์ผ ๋ฆฌ๋ทฐ์ถ์ด"), | |
] | |
analyze_button.click(predict, inputs=[file_input, year_selection], outputs=outputs) | |
with gr.Blocks() as ๋ฆฌ๋ทฐ๋ถ์: | |
year_selection_review = gr.CheckboxGroup( | |
choices=[str(year) for year in select_all_years()], | |
label="์ฐ๋ ์ ํ", | |
value=[str(year) for year in select_all_years()] | |
) | |
option_selection = gr.Radio( | |
choices=["์ ์ฒด์ต์ ๋ถ์", "์ฃผ์์ต์ ๋ถ์(1๊ฐ)", "์ฃผ์์ต์ ๋ถ์(3๊ฐ)", "์ฃผ์์ต์ ๋ถ์(5๊ฐ)"], | |
label="์ต์ ๋ณ ๋ฆฌ๋ทฐ๋ถ์ ์ ํ", | |
value="์ ์ฒด์ต์ ๋ถ์" | |
) | |
analyze_button_review = gr.Button("๋ฆฌ๋ทฐ ๊ฐ์ ธ์ค๊ธฐ") | |
analyze_all_button = gr.Button("๋ฆฌ๋ทฐ ๋ถ์ํ๊ธฐ") | |
with gr.Column(): | |
gr.Markdown("### ๋ฆฌ๋ทฐ ๊ฒฐ๊ณผ") | |
positive_reviews_output_review = gr.Textbox(label="๊ธ์ ์ ์ธ ์ฃผ์ ๋ฆฌ๋ทฐ(20๊ฐ)", interactive=False, lines=12) | |
negative_reviews_output_review = gr.Textbox(label="๋ถ์ ์ ์ธ ์ฃผ์ ๋ฆฌ๋ทฐ(30๊ฐ)", interactive=False, lines=12) | |
gr.Markdown("### ์ถ๋ ฅ") | |
positive_analysis_output_review = gr.Textbox(label="๊ธ์ ์ ์ธ ๋ฆฌ๋ทฐ๋ถ์", interactive=False, lines=12) | |
negative_analysis_output_review = gr.Textbox(label="๋ถ์ ์ ์ธ ๋ฆฌ๋ทฐ๋ถ์", interactive=False, lines=12) | |
analyze_button_review.click(update_reviews, inputs=[file_input, year_selection_review, option_selection], | |
outputs=[positive_reviews_output_review, negative_reviews_output_review]) | |
analyze_all_button.click( | |
fn=analyze_all, | |
inputs=[positive_reviews_output_review, negative_reviews_output_review], | |
outputs=[positive_analysis_output_review, negative_analysis_output_review] | |
) | |
with gr.Row(): | |
with gr.Column(): | |
positive_analysis_output_review | |
with gr.Column(): | |
negative_analysis_output_review | |
with gr.Blocks() as tabs: | |
with gr.Tab("๋ฆฌ๋ทฐ์ถ์ด ๋ถ์"): | |
๋ฆฌ๋ทฐ์ถ์ด_๋ถ์.render() | |
with gr.Tab("๋ฆฌ๋ทฐ๋ถ์"): | |
๋ฆฌ๋ทฐ๋ถ์.render() | |
if __name__ == "__main__": | |
tabs.launch() |