lmy0802's picture
Update app.py
661d369 verified
import gradio as gr
import pandas as pd
import json
import plotly.express as px
import requests
import os
from textblob.download_corpora import download_all
from highlight_util import highlight_adjectives
from send_file import send_to_backend
# 下载TextBlob所需数据(只需运行一次)
download_all()
def on_confirm(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio):
num_parts = num_parts_dropdown
method = "QS" if division_method_radio == "Equal Frequency Partitioning" else "EI"
base_path = f"./dataset/{task_type_radio}/{dataset_radio}"
analysis_result,_ = load_analysis_report(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio)
# 根据perspective选择读取对应的文件
if task_type_radio=="Api Recommendation":
if "Tokens" in perspective_radio and "Recall" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/recall/token_counts_{method}.csv")
elif "Tokens" in perspective_radio and "F1" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/f1/token_counts_{method}.csv")
elif "Lines" in perspective_radio and "Recall" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/recall/line_counts_{method}.csv")
elif "Lines" in perspective_radio and "f1" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/f1/line_counts_{method}.csv")
elif task_type_radio=="Code Completion":
if "Tokens" in perspective_radio :
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv")
elif "Lines" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv")
elif task_type_radio=="Test Generation":
if "Tokens" in perspective_radio :
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv")
elif "Lines" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv")
else:
if "Tokens" in perspective_radio :
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv")
elif "Lines" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv")
elif "Complexity" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/CC_{method}.csv")
elif "Problem Types" in perspective_radio:
df = pd.read_csv(f"{base_path}/cata_result.csv")
# 加载分析报告
# AI分析列
# df["Analysis"] = df["Model"].map(lambda m: analysis_result.get(m, "No analysis provided."))
df["Analysis"] = df["Model"].map(
lambda m: highlight_adjectives(analysis_result.get(m, "No analysis provided."))
)
return df
# 生成 CSS 样式
def generate_css(line_counts, token_counts, cyclomatic_complexity, problem_type, show_high, show_medium, show_low):
css = """
#dataframe th {
background-color: #f2f2f2
}
"""
colors = ["#e6f7ff", "#ffeecc", "#e6ffe6", "#ffe6e6"]
categories = [line_counts, token_counts, cyclomatic_complexity]
category_index = 0
column_index = 1
for category in categories:
if category:
if show_high:
css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n"
column_index += 1
if show_medium:
css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n"
column_index += 1
if show_low:
css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n"
column_index += 1
category_index += 1
# 为 Problem Type 相关的三个子列设置固定颜色
if problem_type:
problem_type_color = "#d4f0fc" # 你可以选择任何你喜欢的颜色
css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {problem_type_color}; }}\n"
css += f"#dataframe td:nth-child({column_index + 2}) {{ background-color: {problem_type_color}; }}\n"
css += f"#dataframe td:nth-child({column_index + 3}) {{ background-color: {problem_type_color}; }}\n"
# 隐藏 "data" 标识
css += """
.gradio-container .dataframe-container::before {
content: none !important;
}
"""
return css
# AI分析
def load_analysis_report(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio):
num_parts = num_parts_dropdown
method = "QS" if division_method_radio == "Equal Frequency Partitioning" else "EI"
# # 根据perspective确定文件路径
# if "Tokens" in perspective_radio:
# perspective = "token_counts"
# elif "Lines" in perspective_radio:
# perspective = "line_counts"
# elif "Complexity" in perspective_radio:
# perspective = "CC"
# else:
# perspective = "problem_type"
# base_path = f"./llm_insight/{task_type_radio}"
# if perspective == "problem_type":
# report_file = f"{base_path}/{dataset_radio}/{perspective}_report.json"
# recommendation_file = f"{base_path}/{dataset_radio}/{perspective}_recommendation.json"
# else:
# report_file = f"{base_path}/{dataset_radio}/{num_parts}/{method}/{perspective}_report.json"
# recommendation_file = f"{base_path}/{dataset_radio}/{num_parts}/{method}/{perspective}_recommendation.json"
base_path = f"./llm_insight/{task_type_radio}"
if task_type_radio=="Code Generation":
# 根据perspective确定文件路径
if "Tokens" in perspective_radio:
perspective = "token_counts"
elif "Lines" in perspective_radio:
perspective = "line_counts"
elif "Complexity" in perspective_radio:
perspective = "CC"
else:
perspective = "problem_type"
if perspective == "problem_type":
report_file = f"{base_path}/{dataset_radio}/{perspective}_report.json"
recommendation_file = f"{base_path}/{dataset_radio}/{perspective}_recommendation.json"
else:
report_file = f"{base_path}/{dataset_radio}/{num_parts}/{method}/{perspective}_report.json"
recommendation_file = f"{base_path}/{dataset_radio}/{num_parts}/{method}/{perspective}_recommendation.json"
else:
report_file = f"{base_path}/{dataset_radio}/report.json"
recommendation_file = f"{base_path}/{dataset_radio}/recommendation.json"
try:
with open(report_file, 'r', encoding='utf-8') as f:
analysis_result = json.load(f)
except Exception as e:
analysis_result = f"[Error] error load analysis report: {e}"
try:
with open(recommendation_file, 'r', encoding='utf-8') as f:
recommendation_result = json.load(f)
except Exception as e:
recommendation_result = f"[Error] error load model recommendation: {e}"
return (analysis_result,recommendation_result)
# 可视化
# def plot_visualization(task_type_radio,dataset_radio, perspective_radio, num_parts, plot_type):
# base_path = f"./dataset/{task_type_radio}/{dataset_radio}"
# if "Tokens" in perspective_radio:
# file_path = f'{base_path}/{num_parts}/QS/token_counts_QS.csv'
# elif "Lines" in perspective_radio:
# file_path = f'{base_path}/{num_parts}/QS/line_counts_QS.csv'
# elif "Complexity" in perspective_radio:
# file_path = f'{base_path}/{num_parts}/QS/CC_QS.csv'
# else: # Problem Types
# file_path = f'{base_path}/cata_result.csv'
# df = pd.read_csv(file_path)
# df.set_index('Model', inplace=True)
# df_transposed = df.T
# if plot_type == "Line Chart":
# fig = px.line(df_transposed,
# x=df_transposed.index,
# y=df_transposed.columns,
# title='Model Performance Across Different Subsets',
# labels={'value': 'Evaluation Score', 'index': 'Subsets'},
# color_discrete_sequence=px.colors.qualitative.Plotly)
# fig.update_traces(hovertemplate='%{y}')
# elif plot_type == "Radar Chart": # Radar Chart
# # 重新组织数据为雷达图所需格式
# radar_data = []
# for model in df.index:
# for subset, score in df.loc[model].items():
# radar_data.append({
# 'Model': model,
# 'Subset': subset,
# 'Score': score
# })
# radar_df = pd.DataFrame(radar_data)
# colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
# # 创建雷达图
# fig = px.line_polar(radar_df,
# r='Score',
# theta='Subset',
# color='Model',
# line_close=True,
# color_discrete_sequence=colors,
# title='Model Performance Radar Chart')
# # 自定义每个模型的线条样式
# for i, trace in enumerate(fig.data):
# trace.update(
# fill=None, # 移除填充
# line=dict(
# width=2,
# dash='solid' if i % 2 == 0 else 'dash', # 交替使用实线和虚线
# )
# )
# # 优化雷达图的显示
# fig.update_layout(
# polar=dict(
# radialaxis=dict(
# visible=True,
# range=[0, 100],
# showline=True,
# linewidth=1,
# gridcolor='lightgrey'
# ),
# angularaxis=dict(
# showline=True,
# linewidth=1,
# gridcolor='lightgrey'
# )
# ),
# showlegend=True,
# legend=dict(
# yanchor="middle", # 垂直居中
# y=0.5,
# xanchor="left",
# x=1.2, # 将图例移到雷达图右侧
# bgcolor="rgba(255, 255, 255, 0.8)", # 半透明白色背景
# bordercolor="lightgrey", # 添加边框
# borderwidth=1
# ),
# margin=dict(r=150), # 增加右侧边距,为图例留出空间
# paper_bgcolor='white'
# )
# else: # Heatmap
# # 创建热力图
# fig = px.imshow(df_transposed,
# labels=dict(x="Model", y="Subset", color="Score"),
# color_continuous_scale="RdYlBu_r", # 使用科研风格配色:红-黄-蓝
# aspect="auto", # 自动调整宽高比
# title="Model Performance Heatmap")
# # 优化热力图显示
# fig.update_layout(
# title=dict(
# text='Model Performance Distribution Across Subsets',
# x=0.5,
# y=0.95,
# xanchor='center',
# yanchor='top',
# font=dict(size=14)
# ),
# xaxis=dict(
# title="Model",
# tickangle=45, # 斜着显示模型名称
# tickfont=dict(size=10),
# side="bottom"
# ),
# yaxis=dict(
# title="Subset",
# tickfont=dict(size=10)
# ),
# coloraxis=dict(
# colorbar=dict(
# title="Score",
# titleside="right",
# tickfont=dict(size=10),
# titlefont=dict(size=12),
# len=0.9, # 色条长度
# )
# ),
# margin=dict(t=80, r=100, b=80, l=80), # 调整边距
# paper_bgcolor='white',
# plot_bgcolor='white'
# )
# # 添加具体数值标注
# annotations = []
# for i in range(len(df_transposed.index)):
# for j in range(len(df_transposed.columns)):
# annotations.append(
# dict(
# x=j,
# y=i,
# text=f"{df_transposed.iloc[i, j]:.1f}",
# showarrow=False,
# font=dict(size=9, color='black')
# )
# )
# fig.update_layout(annotations=annotations)
# return fig
def plot_visualization(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio,plot_type):
# base_path = f"./dataset/{task_type_radio}/{dataset_radio}"
# if "Tokens" in perspective_radio:
# file_path = f'{base_path}/{num_parts}/QS/token_counts_QS.csv'
# elif "Lines" in perspective_radio:
# file_path = f'{base_path}/{num_parts}/QS/line_counts_QS.csv'
# elif "Complexity" in perspective_radio:
# file_path = f'{base_path}/{num_parts}/QS/CC_QS.csv'
# else: # Problem Types
# file_path = f'{base_path}/cata_result.csv'
num_parts = num_parts_dropdown
method = "QS" if division_method_radio == "Equal Frequency Partitioning" else "EI"
base_path = f"./dataset/{task_type_radio}/{dataset_radio}"
analysis_result,_ = load_analysis_report(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio)
# 根据perspective选择读取对应的文件
if task_type_radio=="Api Recommendation":
if "Tokens" in perspective_radio and "Recall" in perspective_radio:
print(f"{base_path}/{num_parts}/{method}/recall/token_counts_{method}.csv")
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/recall/token_counts_{method}.csv")
print(df)
elif "Tokens" in perspective_radio and "F1" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/f1/token_counts_{method}.csv")
elif "Lines" in perspective_radio and "Recall" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/recall/line_counts_{method}.csv")
elif "Lines" in perspective_radio and "f1" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/f1/line_counts_{method}.csv")
elif task_type_radio=="Code Completion":
if "Tokens" in perspective_radio :
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv")
elif "Lines" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv")
elif task_type_radio=="Test Generation":
if "Tokens" in perspective_radio :
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv")
elif "Lines" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv")
else:
if "Tokens" in perspective_radio :
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv")
print(df)
elif "Lines" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv")
elif "Complexity" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/CC_{method}.csv")
elif "Problem Types" in perspective_radio:
df = pd.read_csv(f"{base_path}/cata_result.csv")
if task_type_radio == "Code Generation":
df.set_index('Model', inplace=True)
df_transposed = df.T
model_column_name = 'Model' # Store the column name for later use
else:
df.set_index('Models', inplace=True)
df_transposed = df.T
model_column_name = 'Models' # Store the column name for later use
if plot_type == "Line Chart" and task_type_radio=="Api Recommendation":
df_melted = df_transposed.reset_index().melt(
id_vars="index", # 保留subset列(原列名)
var_name=model_column_name, # 模型列名
value_name="Score" # 分数列
)
fig = px.line(df_transposed,
x=df_transposed.index,
y=df_transposed.columns,
title='Model Performance Across Different Subsets',
labels={'value': 'Evaluation Score', 'index': 'Subsets'},
color_discrete_sequence=px.colors.qualitative.Plotly
)
fig.update_traces(hovertemplate='%{y}')
if plot_type == "Line Chart" and task_type_radio!="Api Recommendation":
fig = px.line(df_transposed,
x=df_transposed.index,
y=df_transposed.columns,
title='Model Performance Across Different Subsets',
labels={'value': 'Evaluation Score', 'index': 'Subsets'},
color_discrete_sequence=px.colors.qualitative.Plotly
)
fig.update_traces(hovertemplate='%{y}')
if plot_type == "Radar Chart":
# Reorganize data for radar chart
radar_data = []
for model in df.index:
for subset, score in df.loc[model].items():
radar_data.append({
model_column_name: model, # Use the stored column name
'Subset': subset,
'Score': score
})
radar_df = pd.DataFrame(radar_data)
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
'#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
# Create radar chart
fig = px.line_polar(radar_df,
r='Score',
theta='Subset',
color=model_column_name, # Use the stored column name
line_close=True,
color_discrete_sequence=colors,
title='Model Performance Radar Chart')
# Customize line styles for each model
for i, trace in enumerate(fig.data):
trace.update(
fill=None, # Remove fill
line=dict(
width=2,
dash='solid' if i % 2 == 0 else 'dash', # Alternate solid and dashed lines
)
)
# Optimize radar chart display
fig.update_layout(
polar=dict(
radialaxis=dict(
visible=True,
range=[0, 100],
showline=True,
linewidth=1,
gridcolor='lightgrey'
),
angularaxis=dict(
showline=True,
linewidth=1,
gridcolor='lightgrey'
)
),
showlegend=True,
legend=dict(
yanchor="middle",
y=0.5,
xanchor="left",
x=1.2,
bgcolor="rgba(255, 255, 255, 0.8)",
bordercolor="lightgrey",
borderwidth=1
),
margin=dict(r=150),
paper_bgcolor='white'
)
if plot_type == "Radar Chart":
# Reorganize data for radar chart
radar_data = []
for model in df.index:
for subset, score in df.loc[model].items():
radar_data.append({
model_column_name: model, # Use the stored column name
'Subset': subset,
'Score': score
})
radar_df = pd.DataFrame(radar_data)
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
'#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
# Create radar chart
fig = px.line_polar(radar_df,
r='Score',
theta='Subset',
color=model_column_name, # Use the stored column name
line_close=True,
color_discrete_sequence=colors,
title='Model Performance Radar Chart')
# Customize line styles for each model
for i, trace in enumerate(fig.data):
trace.update(
fill=None, # Remove fill
line=dict(
width=2,
dash='solid' if i % 2 == 0 else 'dash', # Alternate solid and dashed lines
)
)
# Optimize radar chart display
fig.update_layout(
polar=dict(
radialaxis=dict(
visible=True,
range=[0, 100],
showline=True,
linewidth=1,
gridcolor='lightgrey'
),
angularaxis=dict(
showline=True,
linewidth=1,
gridcolor='lightgrey'
)
),
showlegend=True,
legend=dict(
yanchor="middle",
y=0.5,
xanchor="left",
x=1.2,
bgcolor="rgba(255, 255, 255, 0.8)",
bordercolor="lightgrey",
borderwidth=1
),
margin=dict(r=150),
paper_bgcolor='white'
)
if plot_type == "Heatmap":
# Create heatmap
fig = px.imshow(df_transposed,
labels=dict(x=model_column_name, y="Subset", color="Score"), # Use stored column name
color_continuous_scale="RdYlBu_r",
aspect="auto",
title="Model Performance Heatmap")
# Optimize heatmap display
fig.update_layout(
title=dict(
text='Model Performance Distribution Across Subsets',
x=0.5,
y=0.95,
xanchor='center',
yanchor='top',
font=dict(size=14)
),
xaxis=dict(
title=model_column_name, # Use stored column name
tickangle=45,
tickfont=dict(size=10),
side="bottom"
),
yaxis=dict(
title="Subset",
tickfont=dict(size=10)
),
coloraxis=dict(
colorbar=dict(
title="Score",
titleside="right",
tickfont=dict(size=10),
titlefont=dict(size=12),
len=0.9,
)
),
margin=dict(t=80, r=100, b=80, l=80),
paper_bgcolor='white',
plot_bgcolor='white'
)
# Add value annotations
annotations = []
for i in range(len(df_transposed.index)):
for j in range(len(df_transposed.columns)):
annotations.append(
dict(
x=j,
y=i,
text=f"{df_transposed.iloc[i, j]:.1f}",
showarrow=False,
font=dict(size=9, color='black')
)
)
fig.update_layout(annotations=annotations)
return fig
# 桑基图展示推荐模型
def plot_recommendation_sankey(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio):
import plotly.graph_objects as go
from plotly.colors import sample_colorscale
_, recommendation_result = load_analysis_report(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio)
# 定义节点层级和颜色方案
levels = ['Model Recommendation', 'Scenario', 'Model Family', 'Specific Model']
color_scale = "RdYlBu_r"
# 节点和连接数据
node_labels = [levels[0]] # 根节点
customdata = ["Root node"]
sources, targets, values = [], [], []
# 节点索引跟踪
node_indices = {levels[0]: 0}
current_idx = 1
# 处理推荐列表结构 {"场景1": [ {模型1:原因1}, {模型2:原因2} ], ...}
for scenario, model_dicts in recommendation_result.items():
# 添加场景节点
scenario_label = " ".join(scenario.split()[:3]) + ("..." if len(scenario.split()) > 3 else "")
node_labels.append(scenario_label)
customdata.append(scenario)
node_indices[f"scenario_{scenario}"] = current_idx
current_idx += 1
# 根节点 -> 场景节点连接
sources.append(0)
targets.append(node_indices[f"scenario_{scenario}"])
values.append(10)
# 处理模型列表 [ {模型1:原因1}, {模型2:原因2} ]
for model_dict in model_dicts:
for model, reason in model_dict.items():
# 提取模型系列 (如"GPT-4" -> "GPT")
family = model.split('-')[0].split('_')[0]
# 添加模型系列节点 (如果不存在)
if f"family_{family}" not in node_indices:
node_labels.append(family)
customdata.append(f"Model family: {family}")
node_indices[f"family_{family}"] = current_idx
current_idx += 1
# 场景 -> 模型系列连接
sources.append(node_indices[f"scenario_{scenario}"])
targets.append(node_indices[f"family_{family}"])
values.append(8)
# 添加具体模型节点 (如果不存在)
if f"model_{model}" not in node_indices:
node_labels.append(model)
customdata.append(f"<b>{model}</b><br>{reason}")
node_indices[f"model_{model}"] = current_idx
current_idx += 1
# 模型系列 -> 具体模型连接
sources.append(node_indices[f"family_{family}"])
targets.append(node_indices[f"model_{model}"])
values.append(5)
# 生成颜色 (确保颜色数量匹配节点数量)
node_colors = ["#2c7bb6"] # 根节点颜色
node_colors += sample_colorscale(color_scale, [n/(len(node_labels)-1) for n in range(1, len(node_labels))])
# 创建桑基图
fig = go.Figure(go.Sankey(
arrangement="perpendicular",
node=dict(
pad=20,
thickness=15,
line=dict(color="rgba(0,0,0,0.3)", width=0.2),
label=node_labels,
color=node_colors,
hovertemplate='%{label}<extra></extra>',
x=[0] + [0.33]*len([n for n in node_indices if n.startswith('scenario_')])
+ [0.66]*len([n for n in node_indices if n.startswith('family_')])
+ [1.0]*len([n for n in node_indices if n.startswith('model_')]),
),
link=dict(
source=sources,
target=targets,
value=values,
color="rgba(180,180,180,0.4)",
customdata=[customdata[t] for t in targets],
hovertemplate='%{customdata}<extra></extra>'
)
))
fig.update_layout(
title_text="<b>Model Recommendation Flow</b>",
font_size=11,
height=700,
margin=dict(t=80, l=20, r=20, b=20)
)
return fig
### Gradio代码部分 ###
# 自定义 CSS 样式
custom_css = """
<style>
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
background-color: #f9f9f9;
}
.gr-label {
font-size: 15px;
}
.gr-button-primary {
background-color: #4CAF50;
color: white;
border-radius: 8px;
}
.gr-tabs > .tab-nav {
background-color: #e0e0e0;
border-bottom: 2px solid #ccc;
}
.gr-tabs > .tab-nav button.selected {
background-color: #ffffff !important;
border-bottom: 2px solid #4CAF50;
}
.gr-panel {
padding: 20px;
border-radius: 10px;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
background-color: #fff;
}
.markdown-title {
font-size: 1.5em;
font-weight: bold;
margin-bottom: 10px;
}
.analysis-box {
background-color: #f1f8ff;
padding: 20px;
border-left: 5px solid #4CAF50;
border-radius: 6px;
margin-top: 10px;
}
.recommendation-box {
background-color: #fff3cd;
padding: 20px;
border-left: 5px solid #ff9800;
border-radius: 6px;
margin-top: 10px;
}
</style>
"""
SERVER_URL = "http://10.249.190.53:8000/upload"
# 构建界面
def update_dataset(task):
if task == "Code Generation":
return gr.update(choices=["HumanEval", "MBPP"])
elif task== "Code Completion":
return gr.update(choices=["ComplexCodeEval-Python","ComplexCodeEval-Java"])
elif task == "Api Recommendation":
return gr.update(choices=["ComplexCodeEval-Python","ComplexCodeEval-Java"])
elif task == "Test Generation":
return gr.update(choices=["ComplexCodeEval-Python","ComplexCodeEval-Java"])
with gr.Blocks(css=custom_css) as iface:
gr.HTML("""
<div style='text-align:center; padding:5px;'>
<h1>Multi-view Code LLM Leaderboard</h1>
<p>Multi-view Leaderboard: Towards Evaluating the Code Intelligence of LLMs From Multiple Views</p>
</div>
""")
with gr.Row():
# 配置相关
with gr.Column(scale=1):
task_type_radio = gr.Radio(
["Code Generation", "Code Completion", "Api Recommendation", "Test Generation"],
label="Select Task Type",
value="Code Generation"
)
dataset_radio = gr.Radio(
["HumanEval", "MBPP",'ComplexCodeEval'],
label="Select a dataset",
value="HumanEval"
)
num_parts_slider = gr.Slider(
minimum=3,
maximum=8,
step=1,
label="Choose the Number of Subsets",
value=3
)
# 将多个checkbox改为一个radio
perspective_radio = gr.Radio(
["I - Num of Tokens in Problem Desc",
"II - Num of Lines in Problem Desc",
"III - Complexity of Reference Code",
"IV - Problem Types"],
label="Choose Perspective",
value="I - Num of Tokens in Problem Desc"
)
# 统一的division method radio
division_method_radio = gr.Radio(
["Equal Frequency Partitioning", "Equal Interval Partitioning"],
label="Choose the Division Method",
visible=True
)
confirm_btn = gr.Button("Confirm", variant="primary")
# 核心展示
with gr.Column(scale=2):
with gr.Tabs():
# 表格
with gr.TabItem("Ranking Table"):
data_table = gr.Dataframe(headers=["Model", "Score","Analysis"],
interactive=True,
datatype="html", # 指定第三列为HTML
render=True, # 启用HTML渲染
)
# 可视化
with gr.TabItem("Visualization"):
plot_type = gr.Radio(
choices=["Line Chart", "Radar Chart","Heatmap"],
label="Select Plot Type",
value="Line Chart"
)
chart = gr.Plot()
# AI分析
with gr.TabItem("Model selection suggestions"):
with gr.Column():
# gr.Markdown("<h2 class='markdown-title'>🎯 Model Recommendation</h2>")
recommendation_plot = gr.Plot()
# #*********************上传文件界面布局******************
# with gr.TabItem("Upload inference result"):
# print("new!!!!!!!!!!!!!!!!")
# with gr.Column(scale=1):
# upload_file = gr.File(
# label="📤 上传JSON结果文件",
# type="filepath",
# file_types=[".json"],
# height=100
# )
# task_choice = gr.Radio(
# label="Select Evaluation Task",
# choices=["Code Generation", "Code Completion", "Api Recommendation", "Test Generation"],
# value="Code Generation"
# )
# dataset_choice = gr.Radio(
# ["HumanEval", "MBPP"],
# label="Select a dataset",
# value="HumanEval",
# interactive=True
# )
# task_choice.change(fn=update_dataset, inputs=task_choice, outputs=dataset_choice)
# with gr.Column(scale=2):
# # 状态显示区域
# status = gr.Textbox(
# label="📊 处理状态",
# interactive=False,
# lines=4,
# placeholder="等待文件上传..."
# )
# # 操作按钮区域
# with gr.Row():
# submit_btn = gr.Button("🚀 提交到服务器", variant="primary")
# clear_btn = gr.Button("🧹 清除所有")
# 按钮动作
# submit_btn.click(
# fn=send_to_backend,
# inputs=[upload_file, task_choice, dataset_choice],
# outputs=status
# )
# clear_btn.click(
# fn=lambda: (None, "Code Generation", "HumanEval", "状态已重置"),
# inputs=None,
# outputs=[upload_file, task_choice, dataset_choice, status]
# )
# with gr.Column(scale=2):
# status = gr.Textbox(label="Status")
# submit_btn = gr.Button("Send to Server")
# submit_btn.click(fn=send_to_backend,
# inputs=[upload_file,task_choice, dataset_choice],
# outputs=status
# )
# 根据任务类型切换数据集
def update_dataset_options(task_type):
if task_type == "Code Generation":
return gr.update(choices=["HumanEval", "MBPP"])
elif task_type == "Code Completion":
return gr.update(choices=["ComplexCodeEval-Python","ComplexCodeEval-Java"])
elif task_type == "Api Recommendation":
return gr.update(choices=["ComplexCodeEval-Python","ComplexCodeEval-Java"])
elif task_type == "Test Generation":
return gr.update(choices=["ComplexCodeEval-Python","ComplexCodeEval-Java"])
# 根据数据集切换拆分角度
def update_perspective_options(task,dataset):
if dataset == "MBPP":
return gr.update(choices=[
"I - Num of Tokens in Problem Desc",
"III - Complexity of Reference Code",
"IV - Problem Types"
])
elif dataset =="HumanEval":
return gr.update(choices=[
"I - Num of Tokens in Problem Desc",
"II - Num of Lines in Problem Desc",
"III - Complexity of Reference Code",
"IV - Problem Types"
])
elif task == "Api Recommendation":
return gr.update(choices=[
"I - Num of Tokens in Problem Desc(Eval Metric:Recall)",
"II - Num of Tokens in Problem Desc(Eval Metric:F1)",
"III - Num of Lines in Problem Desc(Eval Metric:Recall)",
"IV - Num of Lines in Problem Desc(Eval Metric:f1)"
])
elif task == "Code Completion" or "Test Generation":
return gr.update(choices=[
"I - Num of Tokens in Problem Desc(Eval Metric:ES)",
"II - Num of Lines in Problem Desc(Eval Metric:ES)"
])
dataset_radio.change(
fn=update_perspective_options,
inputs=[task_type_radio,dataset_radio],
outputs=perspective_radio
)
# 绑定事件
# confirm_btn.click(
# fn=on_confirm,
# inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio],
# outputs=data_table
# ).then(
# fn=load_analysis_report,
# inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio],
# outputs=[gr.State()]
# ).then(
# fn=plot_visualization,
# inputs=[task_type_radio,dataset_radio, perspective_radio, num_parts_slider, plot_type],
# outputs=chart
# ).then(
# fn=plot_recommendation_sankey,
# inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio],
# outputs=[recommendation_plot] # 注意这里是列表
# )
confirm_btn.click(
fn=on_confirm,
inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio],
outputs=data_table
).then(
fn=load_analysis_report,
inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio],
outputs=[gr.State()]
).then(
fn=plot_visualization,
inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio,plot_type],
outputs=chart
).then(
fn=plot_recommendation_sankey,
inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio],
outputs=[recommendation_plot] # 注意这里是列表
)
plot_type.change(
fn=plot_visualization,
inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio,plot_type],
outputs=chart
)
# plot_type.change(
# fn=plot_visualization,
# inputs=[task_type_radio,dataset_radio, perspective_radio, num_parts_slider, plot_type],
# outputs=chart
# )
task_type_radio.change(
fn=update_dataset_options,
inputs=task_type_radio,
outputs=dataset_radio
)
# 启动界面
iface.launch()