Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
import json | |
import plotly.express as px | |
import requests | |
import os | |
from textblob.download_corpora import download_all | |
from highlight_util import highlight_adjectives | |
from send_file import send_to_backend | |
# 下载TextBlob所需数据(只需运行一次) | |
download_all() | |
def on_confirm(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio): | |
num_parts = num_parts_dropdown | |
method = "QS" if division_method_radio == "Equal Frequency Partitioning" else "EI" | |
base_path = f"./dataset/{task_type_radio}/{dataset_radio}" | |
analysis_result,_ = load_analysis_report(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio) | |
# 根据perspective选择读取对应的文件 | |
if task_type_radio=="Api Recommendation": | |
if "Tokens" in perspective_radio and "Recall" in perspective_radio: | |
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/recall/token_counts_{method}.csv") | |
elif "Tokens" in perspective_radio and "F1" in perspective_radio: | |
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/f1/token_counts_{method}.csv") | |
elif "Lines" in perspective_radio and "Recall" in perspective_radio: | |
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/recall/line_counts_{method}.csv") | |
elif "Lines" in perspective_radio and "f1" in perspective_radio: | |
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/f1/line_counts_{method}.csv") | |
elif task_type_radio=="Code Completion": | |
if "Tokens" in perspective_radio : | |
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv") | |
elif "Lines" in perspective_radio: | |
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv") | |
elif task_type_radio=="Test Generation": | |
if "Tokens" in perspective_radio : | |
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv") | |
elif "Lines" in perspective_radio: | |
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv") | |
else: | |
if "Tokens" in perspective_radio : | |
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv") | |
elif "Lines" in perspective_radio: | |
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv") | |
elif "Complexity" in perspective_radio: | |
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/CC_{method}.csv") | |
elif "Problem Types" in perspective_radio: | |
df = pd.read_csv(f"{base_path}/cata_result.csv") | |
# 加载分析报告 | |
# AI分析列 | |
# df["Analysis"] = df["Model"].map(lambda m: analysis_result.get(m, "No analysis provided.")) | |
df["Analysis"] = df["Model"].map( | |
lambda m: highlight_adjectives(analysis_result.get(m, "No analysis provided.")) | |
) | |
return df | |
# 生成 CSS 样式 | |
def generate_css(line_counts, token_counts, cyclomatic_complexity, problem_type, show_high, show_medium, show_low): | |
css = """ | |
#dataframe th { | |
background-color: #f2f2f2 | |
} | |
""" | |
colors = ["#e6f7ff", "#ffeecc", "#e6ffe6", "#ffe6e6"] | |
categories = [line_counts, token_counts, cyclomatic_complexity] | |
category_index = 0 | |
column_index = 1 | |
for category in categories: | |
if category: | |
if show_high: | |
css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n" | |
column_index += 1 | |
if show_medium: | |
css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n" | |
column_index += 1 | |
if show_low: | |
css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n" | |
column_index += 1 | |
category_index += 1 | |
# 为 Problem Type 相关的三个子列设置固定颜色 | |
if problem_type: | |
problem_type_color = "#d4f0fc" # 你可以选择任何你喜欢的颜色 | |
css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {problem_type_color}; }}\n" | |
css += f"#dataframe td:nth-child({column_index + 2}) {{ background-color: {problem_type_color}; }}\n" | |
css += f"#dataframe td:nth-child({column_index + 3}) {{ background-color: {problem_type_color}; }}\n" | |
# 隐藏 "data" 标识 | |
css += """ | |
.gradio-container .dataframe-container::before { | |
content: none !important; | |
} | |
""" | |
return css | |
# AI分析 | |
def load_analysis_report(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio): | |
num_parts = num_parts_dropdown | |
method = "QS" if division_method_radio == "Equal Frequency Partitioning" else "EI" | |
# # 根据perspective确定文件路径 | |
# if "Tokens" in perspective_radio: | |
# perspective = "token_counts" | |
# elif "Lines" in perspective_radio: | |
# perspective = "line_counts" | |
# elif "Complexity" in perspective_radio: | |
# perspective = "CC" | |
# else: | |
# perspective = "problem_type" | |
# base_path = f"./llm_insight/{task_type_radio}" | |
# if perspective == "problem_type": | |
# report_file = f"{base_path}/{dataset_radio}/{perspective}_report.json" | |
# recommendation_file = f"{base_path}/{dataset_radio}/{perspective}_recommendation.json" | |
# else: | |
# report_file = f"{base_path}/{dataset_radio}/{num_parts}/{method}/{perspective}_report.json" | |
# recommendation_file = f"{base_path}/{dataset_radio}/{num_parts}/{method}/{perspective}_recommendation.json" | |
base_path = f"./llm_insight/{task_type_radio}" | |
if task_type_radio=="Code Generation": | |
# 根据perspective确定文件路径 | |
if "Tokens" in perspective_radio: | |
perspective = "token_counts" | |
elif "Lines" in perspective_radio: | |
perspective = "line_counts" | |
elif "Complexity" in perspective_radio: | |
perspective = "CC" | |
else: | |
perspective = "problem_type" | |
if perspective == "problem_type": | |
report_file = f"{base_path}/{dataset_radio}/{perspective}_report.json" | |
recommendation_file = f"{base_path}/{dataset_radio}/{perspective}_recommendation.json" | |
else: | |
report_file = f"{base_path}/{dataset_radio}/{num_parts}/{method}/{perspective}_report.json" | |
recommendation_file = f"{base_path}/{dataset_radio}/{num_parts}/{method}/{perspective}_recommendation.json" | |
else: | |
report_file = f"{base_path}/{dataset_radio}/report.json" | |
recommendation_file = f"{base_path}/{dataset_radio}/recommendation.json" | |
try: | |
with open(report_file, 'r', encoding='utf-8') as f: | |
analysis_result = json.load(f) | |
except Exception as e: | |
analysis_result = f"[Error] error load analysis report: {e}" | |
try: | |
with open(recommendation_file, 'r', encoding='utf-8') as f: | |
recommendation_result = json.load(f) | |
except Exception as e: | |
recommendation_result = f"[Error] error load model recommendation: {e}" | |
return (analysis_result,recommendation_result) | |
# 可视化 | |
# def plot_visualization(task_type_radio,dataset_radio, perspective_radio, num_parts, plot_type): | |
# base_path = f"./dataset/{task_type_radio}/{dataset_radio}" | |
# if "Tokens" in perspective_radio: | |
# file_path = f'{base_path}/{num_parts}/QS/token_counts_QS.csv' | |
# elif "Lines" in perspective_radio: | |
# file_path = f'{base_path}/{num_parts}/QS/line_counts_QS.csv' | |
# elif "Complexity" in perspective_radio: | |
# file_path = f'{base_path}/{num_parts}/QS/CC_QS.csv' | |
# else: # Problem Types | |
# file_path = f'{base_path}/cata_result.csv' | |
# df = pd.read_csv(file_path) | |
# df.set_index('Model', inplace=True) | |
# df_transposed = df.T | |
# if plot_type == "Line Chart": | |
# fig = px.line(df_transposed, | |
# x=df_transposed.index, | |
# y=df_transposed.columns, | |
# title='Model Performance Across Different Subsets', | |
# labels={'value': 'Evaluation Score', 'index': 'Subsets'}, | |
# color_discrete_sequence=px.colors.qualitative.Plotly) | |
# fig.update_traces(hovertemplate='%{y}') | |
# elif plot_type == "Radar Chart": # Radar Chart | |
# # 重新组织数据为雷达图所需格式 | |
# radar_data = [] | |
# for model in df.index: | |
# for subset, score in df.loc[model].items(): | |
# radar_data.append({ | |
# 'Model': model, | |
# 'Subset': subset, | |
# 'Score': score | |
# }) | |
# radar_df = pd.DataFrame(radar_data) | |
# colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] | |
# # 创建雷达图 | |
# fig = px.line_polar(radar_df, | |
# r='Score', | |
# theta='Subset', | |
# color='Model', | |
# line_close=True, | |
# color_discrete_sequence=colors, | |
# title='Model Performance Radar Chart') | |
# # 自定义每个模型的线条样式 | |
# for i, trace in enumerate(fig.data): | |
# trace.update( | |
# fill=None, # 移除填充 | |
# line=dict( | |
# width=2, | |
# dash='solid' if i % 2 == 0 else 'dash', # 交替使用实线和虚线 | |
# ) | |
# ) | |
# # 优化雷达图的显示 | |
# fig.update_layout( | |
# polar=dict( | |
# radialaxis=dict( | |
# visible=True, | |
# range=[0, 100], | |
# showline=True, | |
# linewidth=1, | |
# gridcolor='lightgrey' | |
# ), | |
# angularaxis=dict( | |
# showline=True, | |
# linewidth=1, | |
# gridcolor='lightgrey' | |
# ) | |
# ), | |
# showlegend=True, | |
# legend=dict( | |
# yanchor="middle", # 垂直居中 | |
# y=0.5, | |
# xanchor="left", | |
# x=1.2, # 将图例移到雷达图右侧 | |
# bgcolor="rgba(255, 255, 255, 0.8)", # 半透明白色背景 | |
# bordercolor="lightgrey", # 添加边框 | |
# borderwidth=1 | |
# ), | |
# margin=dict(r=150), # 增加右侧边距,为图例留出空间 | |
# paper_bgcolor='white' | |
# ) | |
# else: # Heatmap | |
# # 创建热力图 | |
# fig = px.imshow(df_transposed, | |
# labels=dict(x="Model", y="Subset", color="Score"), | |
# color_continuous_scale="RdYlBu_r", # 使用科研风格配色:红-黄-蓝 | |
# aspect="auto", # 自动调整宽高比 | |
# title="Model Performance Heatmap") | |
# # 优化热力图显示 | |
# fig.update_layout( | |
# title=dict( | |
# text='Model Performance Distribution Across Subsets', | |
# x=0.5, | |
# y=0.95, | |
# xanchor='center', | |
# yanchor='top', | |
# font=dict(size=14) | |
# ), | |
# xaxis=dict( | |
# title="Model", | |
# tickangle=45, # 斜着显示模型名称 | |
# tickfont=dict(size=10), | |
# side="bottom" | |
# ), | |
# yaxis=dict( | |
# title="Subset", | |
# tickfont=dict(size=10) | |
# ), | |
# coloraxis=dict( | |
# colorbar=dict( | |
# title="Score", | |
# titleside="right", | |
# tickfont=dict(size=10), | |
# titlefont=dict(size=12), | |
# len=0.9, # 色条长度 | |
# ) | |
# ), | |
# margin=dict(t=80, r=100, b=80, l=80), # 调整边距 | |
# paper_bgcolor='white', | |
# plot_bgcolor='white' | |
# ) | |
# # 添加具体数值标注 | |
# annotations = [] | |
# for i in range(len(df_transposed.index)): | |
# for j in range(len(df_transposed.columns)): | |
# annotations.append( | |
# dict( | |
# x=j, | |
# y=i, | |
# text=f"{df_transposed.iloc[i, j]:.1f}", | |
# showarrow=False, | |
# font=dict(size=9, color='black') | |
# ) | |
# ) | |
# fig.update_layout(annotations=annotations) | |
# return fig | |
def plot_visualization(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio,plot_type): | |
# base_path = f"./dataset/{task_type_radio}/{dataset_radio}" | |
# if "Tokens" in perspective_radio: | |
# file_path = f'{base_path}/{num_parts}/QS/token_counts_QS.csv' | |
# elif "Lines" in perspective_radio: | |
# file_path = f'{base_path}/{num_parts}/QS/line_counts_QS.csv' | |
# elif "Complexity" in perspective_radio: | |
# file_path = f'{base_path}/{num_parts}/QS/CC_QS.csv' | |
# else: # Problem Types | |
# file_path = f'{base_path}/cata_result.csv' | |
num_parts = num_parts_dropdown | |
method = "QS" if division_method_radio == "Equal Frequency Partitioning" else "EI" | |
base_path = f"./dataset/{task_type_radio}/{dataset_radio}" | |
analysis_result,_ = load_analysis_report(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio) | |
# 根据perspective选择读取对应的文件 | |
if task_type_radio=="Api Recommendation": | |
if "Tokens" in perspective_radio and "Recall" in perspective_radio: | |
print(f"{base_path}/{num_parts}/{method}/recall/token_counts_{method}.csv") | |
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/recall/token_counts_{method}.csv") | |
print(df) | |
elif "Tokens" in perspective_radio and "F1" in perspective_radio: | |
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/f1/token_counts_{method}.csv") | |
elif "Lines" in perspective_radio and "Recall" in perspective_radio: | |
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/recall/line_counts_{method}.csv") | |
elif "Lines" in perspective_radio and "f1" in perspective_radio: | |
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/f1/line_counts_{method}.csv") | |
elif task_type_radio=="Code Completion": | |
if "Tokens" in perspective_radio : | |
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv") | |
elif "Lines" in perspective_radio: | |
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv") | |
elif task_type_radio=="Test Generation": | |
if "Tokens" in perspective_radio : | |
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv") | |
elif "Lines" in perspective_radio: | |
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv") | |
else: | |
if "Tokens" in perspective_radio : | |
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv") | |
print(df) | |
elif "Lines" in perspective_radio: | |
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv") | |
elif "Complexity" in perspective_radio: | |
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/CC_{method}.csv") | |
elif "Problem Types" in perspective_radio: | |
df = pd.read_csv(f"{base_path}/cata_result.csv") | |
if task_type_radio == "Code Generation": | |
df.set_index('Model', inplace=True) | |
df_transposed = df.T | |
model_column_name = 'Model' # Store the column name for later use | |
else: | |
df.set_index('Models', inplace=True) | |
df_transposed = df.T | |
model_column_name = 'Models' # Store the column name for later use | |
if plot_type == "Line Chart" and task_type_radio=="Api Recommendation": | |
df_melted = df_transposed.reset_index().melt( | |
id_vars="index", # 保留subset列(原列名) | |
var_name=model_column_name, # 模型列名 | |
value_name="Score" # 分数列 | |
) | |
fig = px.line(df_transposed, | |
x=df_transposed.index, | |
y=df_transposed.columns, | |
title='Model Performance Across Different Subsets', | |
labels={'value': 'Evaluation Score', 'index': 'Subsets'}, | |
color_discrete_sequence=px.colors.qualitative.Plotly | |
) | |
fig.update_traces(hovertemplate='%{y}') | |
if plot_type == "Line Chart" and task_type_radio!="Api Recommendation": | |
fig = px.line(df_transposed, | |
x=df_transposed.index, | |
y=df_transposed.columns, | |
title='Model Performance Across Different Subsets', | |
labels={'value': 'Evaluation Score', 'index': 'Subsets'}, | |
color_discrete_sequence=px.colors.qualitative.Plotly | |
) | |
fig.update_traces(hovertemplate='%{y}') | |
if plot_type == "Radar Chart": | |
# Reorganize data for radar chart | |
radar_data = [] | |
for model in df.index: | |
for subset, score in df.loc[model].items(): | |
radar_data.append({ | |
model_column_name: model, # Use the stored column name | |
'Subset': subset, | |
'Score': score | |
}) | |
radar_df = pd.DataFrame(radar_data) | |
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', | |
'#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] | |
# Create radar chart | |
fig = px.line_polar(radar_df, | |
r='Score', | |
theta='Subset', | |
color=model_column_name, # Use the stored column name | |
line_close=True, | |
color_discrete_sequence=colors, | |
title='Model Performance Radar Chart') | |
# Customize line styles for each model | |
for i, trace in enumerate(fig.data): | |
trace.update( | |
fill=None, # Remove fill | |
line=dict( | |
width=2, | |
dash='solid' if i % 2 == 0 else 'dash', # Alternate solid and dashed lines | |
) | |
) | |
# Optimize radar chart display | |
fig.update_layout( | |
polar=dict( | |
radialaxis=dict( | |
visible=True, | |
range=[0, 100], | |
showline=True, | |
linewidth=1, | |
gridcolor='lightgrey' | |
), | |
angularaxis=dict( | |
showline=True, | |
linewidth=1, | |
gridcolor='lightgrey' | |
) | |
), | |
showlegend=True, | |
legend=dict( | |
yanchor="middle", | |
y=0.5, | |
xanchor="left", | |
x=1.2, | |
bgcolor="rgba(255, 255, 255, 0.8)", | |
bordercolor="lightgrey", | |
borderwidth=1 | |
), | |
margin=dict(r=150), | |
paper_bgcolor='white' | |
) | |
if plot_type == "Radar Chart": | |
# Reorganize data for radar chart | |
radar_data = [] | |
for model in df.index: | |
for subset, score in df.loc[model].items(): | |
radar_data.append({ | |
model_column_name: model, # Use the stored column name | |
'Subset': subset, | |
'Score': score | |
}) | |
radar_df = pd.DataFrame(radar_data) | |
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', | |
'#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] | |
# Create radar chart | |
fig = px.line_polar(radar_df, | |
r='Score', | |
theta='Subset', | |
color=model_column_name, # Use the stored column name | |
line_close=True, | |
color_discrete_sequence=colors, | |
title='Model Performance Radar Chart') | |
# Customize line styles for each model | |
for i, trace in enumerate(fig.data): | |
trace.update( | |
fill=None, # Remove fill | |
line=dict( | |
width=2, | |
dash='solid' if i % 2 == 0 else 'dash', # Alternate solid and dashed lines | |
) | |
) | |
# Optimize radar chart display | |
fig.update_layout( | |
polar=dict( | |
radialaxis=dict( | |
visible=True, | |
range=[0, 100], | |
showline=True, | |
linewidth=1, | |
gridcolor='lightgrey' | |
), | |
angularaxis=dict( | |
showline=True, | |
linewidth=1, | |
gridcolor='lightgrey' | |
) | |
), | |
showlegend=True, | |
legend=dict( | |
yanchor="middle", | |
y=0.5, | |
xanchor="left", | |
x=1.2, | |
bgcolor="rgba(255, 255, 255, 0.8)", | |
bordercolor="lightgrey", | |
borderwidth=1 | |
), | |
margin=dict(r=150), | |
paper_bgcolor='white' | |
) | |
if plot_type == "Heatmap": | |
# Create heatmap | |
fig = px.imshow(df_transposed, | |
labels=dict(x=model_column_name, y="Subset", color="Score"), # Use stored column name | |
color_continuous_scale="RdYlBu_r", | |
aspect="auto", | |
title="Model Performance Heatmap") | |
# Optimize heatmap display | |
fig.update_layout( | |
title=dict( | |
text='Model Performance Distribution Across Subsets', | |
x=0.5, | |
y=0.95, | |
xanchor='center', | |
yanchor='top', | |
font=dict(size=14) | |
), | |
xaxis=dict( | |
title=model_column_name, # Use stored column name | |
tickangle=45, | |
tickfont=dict(size=10), | |
side="bottom" | |
), | |
yaxis=dict( | |
title="Subset", | |
tickfont=dict(size=10) | |
), | |
coloraxis=dict( | |
colorbar=dict( | |
title="Score", | |
titleside="right", | |
tickfont=dict(size=10), | |
titlefont=dict(size=12), | |
len=0.9, | |
) | |
), | |
margin=dict(t=80, r=100, b=80, l=80), | |
paper_bgcolor='white', | |
plot_bgcolor='white' | |
) | |
# Add value annotations | |
annotations = [] | |
for i in range(len(df_transposed.index)): | |
for j in range(len(df_transposed.columns)): | |
annotations.append( | |
dict( | |
x=j, | |
y=i, | |
text=f"{df_transposed.iloc[i, j]:.1f}", | |
showarrow=False, | |
font=dict(size=9, color='black') | |
) | |
) | |
fig.update_layout(annotations=annotations) | |
return fig | |
# 桑基图展示推荐模型 | |
def plot_recommendation_sankey(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio): | |
import plotly.graph_objects as go | |
from plotly.colors import sample_colorscale | |
_, recommendation_result = load_analysis_report(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio) | |
# 定义节点层级和颜色方案 | |
levels = ['Model Recommendation', 'Scenario', 'Model Family', 'Specific Model'] | |
color_scale = "RdYlBu_r" | |
# 节点和连接数据 | |
node_labels = [levels[0]] # 根节点 | |
customdata = ["Root node"] | |
sources, targets, values = [], [], [] | |
# 节点索引跟踪 | |
node_indices = {levels[0]: 0} | |
current_idx = 1 | |
# 处理推荐列表结构 {"场景1": [ {模型1:原因1}, {模型2:原因2} ], ...} | |
for scenario, model_dicts in recommendation_result.items(): | |
# 添加场景节点 | |
scenario_label = " ".join(scenario.split()[:3]) + ("..." if len(scenario.split()) > 3 else "") | |
node_labels.append(scenario_label) | |
customdata.append(scenario) | |
node_indices[f"scenario_{scenario}"] = current_idx | |
current_idx += 1 | |
# 根节点 -> 场景节点连接 | |
sources.append(0) | |
targets.append(node_indices[f"scenario_{scenario}"]) | |
values.append(10) | |
# 处理模型列表 [ {模型1:原因1}, {模型2:原因2} ] | |
for model_dict in model_dicts: | |
for model, reason in model_dict.items(): | |
# 提取模型系列 (如"GPT-4" -> "GPT") | |
family = model.split('-')[0].split('_')[0] | |
# 添加模型系列节点 (如果不存在) | |
if f"family_{family}" not in node_indices: | |
node_labels.append(family) | |
customdata.append(f"Model family: {family}") | |
node_indices[f"family_{family}"] = current_idx | |
current_idx += 1 | |
# 场景 -> 模型系列连接 | |
sources.append(node_indices[f"scenario_{scenario}"]) | |
targets.append(node_indices[f"family_{family}"]) | |
values.append(8) | |
# 添加具体模型节点 (如果不存在) | |
if f"model_{model}" not in node_indices: | |
node_labels.append(model) | |
customdata.append(f"<b>{model}</b><br>{reason}") | |
node_indices[f"model_{model}"] = current_idx | |
current_idx += 1 | |
# 模型系列 -> 具体模型连接 | |
sources.append(node_indices[f"family_{family}"]) | |
targets.append(node_indices[f"model_{model}"]) | |
values.append(5) | |
# 生成颜色 (确保颜色数量匹配节点数量) | |
node_colors = ["#2c7bb6"] # 根节点颜色 | |
node_colors += sample_colorscale(color_scale, [n/(len(node_labels)-1) for n in range(1, len(node_labels))]) | |
# 创建桑基图 | |
fig = go.Figure(go.Sankey( | |
arrangement="perpendicular", | |
node=dict( | |
pad=20, | |
thickness=15, | |
line=dict(color="rgba(0,0,0,0.3)", width=0.2), | |
label=node_labels, | |
color=node_colors, | |
hovertemplate='%{label}<extra></extra>', | |
x=[0] + [0.33]*len([n for n in node_indices if n.startswith('scenario_')]) | |
+ [0.66]*len([n for n in node_indices if n.startswith('family_')]) | |
+ [1.0]*len([n for n in node_indices if n.startswith('model_')]), | |
), | |
link=dict( | |
source=sources, | |
target=targets, | |
value=values, | |
color="rgba(180,180,180,0.4)", | |
customdata=[customdata[t] for t in targets], | |
hovertemplate='%{customdata}<extra></extra>' | |
) | |
)) | |
fig.update_layout( | |
title_text="<b>Model Recommendation Flow</b>", | |
font_size=11, | |
height=700, | |
margin=dict(t=80, l=20, r=20, b=20) | |
) | |
return fig | |
### Gradio代码部分 ### | |
# 自定义 CSS 样式 | |
custom_css = """ | |
<style> | |
body { | |
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; | |
background-color: #f9f9f9; | |
} | |
.gr-label { | |
font-size: 15px; | |
} | |
.gr-button-primary { | |
background-color: #4CAF50; | |
color: white; | |
border-radius: 8px; | |
} | |
.gr-tabs > .tab-nav { | |
background-color: #e0e0e0; | |
border-bottom: 2px solid #ccc; | |
} | |
.gr-tabs > .tab-nav button.selected { | |
background-color: #ffffff !important; | |
border-bottom: 2px solid #4CAF50; | |
} | |
.gr-panel { | |
padding: 20px; | |
border-radius: 10px; | |
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); | |
background-color: #fff; | |
} | |
.markdown-title { | |
font-size: 1.5em; | |
font-weight: bold; | |
margin-bottom: 10px; | |
} | |
.analysis-box { | |
background-color: #f1f8ff; | |
padding: 20px; | |
border-left: 5px solid #4CAF50; | |
border-radius: 6px; | |
margin-top: 10px; | |
} | |
.recommendation-box { | |
background-color: #fff3cd; | |
padding: 20px; | |
border-left: 5px solid #ff9800; | |
border-radius: 6px; | |
margin-top: 10px; | |
} | |
</style> | |
""" | |
SERVER_URL = "http://10.249.190.53:8000/upload" | |
# 构建界面 | |
def update_dataset(task): | |
if task == "Code Generation": | |
return gr.update(choices=["HumanEval", "MBPP"]) | |
elif task== "Code Completion": | |
return gr.update(choices=["ComplexCodeEval-Python","ComplexCodeEval-Java"]) | |
elif task == "Api Recommendation": | |
return gr.update(choices=["ComplexCodeEval-Python","ComplexCodeEval-Java"]) | |
elif task == "Test Generation": | |
return gr.update(choices=["ComplexCodeEval-Python","ComplexCodeEval-Java"]) | |
with gr.Blocks(css=custom_css) as iface: | |
gr.HTML(""" | |
<div style='text-align:center; padding:5px;'> | |
<h1>Multi-view Code LLM Leaderboard</h1> | |
<p>Multi-view Leaderboard: Towards Evaluating the Code Intelligence of LLMs From Multiple Views</p> | |
</div> | |
""") | |
with gr.Row(): | |
# 配置相关 | |
with gr.Column(scale=1): | |
task_type_radio = gr.Radio( | |
["Code Generation", "Code Completion", "Api Recommendation", "Test Generation"], | |
label="Select Task Type", | |
value="Code Generation" | |
) | |
dataset_radio = gr.Radio( | |
["HumanEval", "MBPP",'ComplexCodeEval'], | |
label="Select a dataset", | |
value="HumanEval" | |
) | |
num_parts_slider = gr.Slider( | |
minimum=3, | |
maximum=8, | |
step=1, | |
label="Choose the Number of Subsets", | |
value=3 | |
) | |
# 将多个checkbox改为一个radio | |
perspective_radio = gr.Radio( | |
["I - Num of Tokens in Problem Desc", | |
"II - Num of Lines in Problem Desc", | |
"III - Complexity of Reference Code", | |
"IV - Problem Types"], | |
label="Choose Perspective", | |
value="I - Num of Tokens in Problem Desc" | |
) | |
# 统一的division method radio | |
division_method_radio = gr.Radio( | |
["Equal Frequency Partitioning", "Equal Interval Partitioning"], | |
label="Choose the Division Method", | |
visible=True | |
) | |
confirm_btn = gr.Button("Confirm", variant="primary") | |
# 核心展示 | |
with gr.Column(scale=2): | |
with gr.Tabs(): | |
# 表格 | |
with gr.TabItem("Ranking Table"): | |
data_table = gr.Dataframe(headers=["Model", "Score","Analysis"], | |
interactive=True, | |
datatype="html", # 指定第三列为HTML | |
render=True, # 启用HTML渲染 | |
) | |
# 可视化 | |
with gr.TabItem("Visualization"): | |
plot_type = gr.Radio( | |
choices=["Line Chart", "Radar Chart","Heatmap"], | |
label="Select Plot Type", | |
value="Line Chart" | |
) | |
chart = gr.Plot() | |
# AI分析 | |
with gr.TabItem("Model selection suggestions"): | |
with gr.Column(): | |
# gr.Markdown("<h2 class='markdown-title'>🎯 Model Recommendation</h2>") | |
recommendation_plot = gr.Plot() | |
# #*********************上传文件界面布局****************** | |
# with gr.TabItem("Upload inference result"): | |
# print("new!!!!!!!!!!!!!!!!") | |
# with gr.Column(scale=1): | |
# upload_file = gr.File( | |
# label="📤 上传JSON结果文件", | |
# type="filepath", | |
# file_types=[".json"], | |
# height=100 | |
# ) | |
# task_choice = gr.Radio( | |
# label="Select Evaluation Task", | |
# choices=["Code Generation", "Code Completion", "Api Recommendation", "Test Generation"], | |
# value="Code Generation" | |
# ) | |
# dataset_choice = gr.Radio( | |
# ["HumanEval", "MBPP"], | |
# label="Select a dataset", | |
# value="HumanEval", | |
# interactive=True | |
# ) | |
# task_choice.change(fn=update_dataset, inputs=task_choice, outputs=dataset_choice) | |
# with gr.Column(scale=2): | |
# # 状态显示区域 | |
# status = gr.Textbox( | |
# label="📊 处理状态", | |
# interactive=False, | |
# lines=4, | |
# placeholder="等待文件上传..." | |
# ) | |
# # 操作按钮区域 | |
# with gr.Row(): | |
# submit_btn = gr.Button("🚀 提交到服务器", variant="primary") | |
# clear_btn = gr.Button("🧹 清除所有") | |
# 按钮动作 | |
# submit_btn.click( | |
# fn=send_to_backend, | |
# inputs=[upload_file, task_choice, dataset_choice], | |
# outputs=status | |
# ) | |
# clear_btn.click( | |
# fn=lambda: (None, "Code Generation", "HumanEval", "状态已重置"), | |
# inputs=None, | |
# outputs=[upload_file, task_choice, dataset_choice, status] | |
# ) | |
# with gr.Column(scale=2): | |
# status = gr.Textbox(label="Status") | |
# submit_btn = gr.Button("Send to Server") | |
# submit_btn.click(fn=send_to_backend, | |
# inputs=[upload_file,task_choice, dataset_choice], | |
# outputs=status | |
# ) | |
# 根据任务类型切换数据集 | |
def update_dataset_options(task_type): | |
if task_type == "Code Generation": | |
return gr.update(choices=["HumanEval", "MBPP"]) | |
elif task_type == "Code Completion": | |
return gr.update(choices=["ComplexCodeEval-Python","ComplexCodeEval-Java"]) | |
elif task_type == "Api Recommendation": | |
return gr.update(choices=["ComplexCodeEval-Python","ComplexCodeEval-Java"]) | |
elif task_type == "Test Generation": | |
return gr.update(choices=["ComplexCodeEval-Python","ComplexCodeEval-Java"]) | |
# 根据数据集切换拆分角度 | |
def update_perspective_options(task,dataset): | |
if dataset == "MBPP": | |
return gr.update(choices=[ | |
"I - Num of Tokens in Problem Desc", | |
"III - Complexity of Reference Code", | |
"IV - Problem Types" | |
]) | |
elif dataset =="HumanEval": | |
return gr.update(choices=[ | |
"I - Num of Tokens in Problem Desc", | |
"II - Num of Lines in Problem Desc", | |
"III - Complexity of Reference Code", | |
"IV - Problem Types" | |
]) | |
elif task == "Api Recommendation": | |
return gr.update(choices=[ | |
"I - Num of Tokens in Problem Desc(Eval Metric:Recall)", | |
"II - Num of Tokens in Problem Desc(Eval Metric:F1)", | |
"III - Num of Lines in Problem Desc(Eval Metric:Recall)", | |
"IV - Num of Lines in Problem Desc(Eval Metric:f1)" | |
]) | |
elif task == "Code Completion" or "Test Generation": | |
return gr.update(choices=[ | |
"I - Num of Tokens in Problem Desc(Eval Metric:ES)", | |
"II - Num of Lines in Problem Desc(Eval Metric:ES)" | |
]) | |
dataset_radio.change( | |
fn=update_perspective_options, | |
inputs=[task_type_radio,dataset_radio], | |
outputs=perspective_radio | |
) | |
# 绑定事件 | |
# confirm_btn.click( | |
# fn=on_confirm, | |
# inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio], | |
# outputs=data_table | |
# ).then( | |
# fn=load_analysis_report, | |
# inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio], | |
# outputs=[gr.State()] | |
# ).then( | |
# fn=plot_visualization, | |
# inputs=[task_type_radio,dataset_radio, perspective_radio, num_parts_slider, plot_type], | |
# outputs=chart | |
# ).then( | |
# fn=plot_recommendation_sankey, | |
# inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio], | |
# outputs=[recommendation_plot] # 注意这里是列表 | |
# ) | |
confirm_btn.click( | |
fn=on_confirm, | |
inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio], | |
outputs=data_table | |
).then( | |
fn=load_analysis_report, | |
inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio], | |
outputs=[gr.State()] | |
).then( | |
fn=plot_visualization, | |
inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio,plot_type], | |
outputs=chart | |
).then( | |
fn=plot_recommendation_sankey, | |
inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio], | |
outputs=[recommendation_plot] # 注意这里是列表 | |
) | |
plot_type.change( | |
fn=plot_visualization, | |
inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio,plot_type], | |
outputs=chart | |
) | |
# plot_type.change( | |
# fn=plot_visualization, | |
# inputs=[task_type_radio,dataset_radio, perspective_radio, num_parts_slider, plot_type], | |
# outputs=chart | |
# ) | |
task_type_radio.change( | |
fn=update_dataset_options, | |
inputs=task_type_radio, | |
outputs=dataset_radio | |
) | |
# 启动界面 | |
iface.launch() |