Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import json | |
| import plotly.express as px | |
| import requests | |
| import os | |
| from textblob.download_corpora import download_all | |
| from highlight_util import highlight_adjectives | |
| from send_file import send_to_backend | |
| # 下载TextBlob所需数据(只需运行一次) | |
| download_all() | |
| def on_confirm(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio): | |
| num_parts = num_parts_dropdown | |
| method = "QS" if division_method_radio == "Equal Frequency Partitioning" else "EI" | |
| base_path = f"./dataset/{task_type_radio}/{dataset_radio}" | |
| analysis_result,_ = load_analysis_report(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio) | |
| # 根据perspective选择读取对应的文件 | |
| if task_type_radio=="Api Recommendation": | |
| if "Tokens" in perspective_radio and "Recall" in perspective_radio: | |
| df = pd.read_csv(f"{base_path}/{num_parts}/{method}/recall/token_counts_{method}.csv") | |
| elif "Tokens" in perspective_radio and "F1" in perspective_radio: | |
| df = pd.read_csv(f"{base_path}/{num_parts}/{method}/f1/token_counts_{method}.csv") | |
| elif "Lines" in perspective_radio and "Recall" in perspective_radio: | |
| df = pd.read_csv(f"{base_path}/{num_parts}/{method}/recall/line_counts_{method}.csv") | |
| elif "Lines" in perspective_radio and "f1" in perspective_radio: | |
| df = pd.read_csv(f"{base_path}/{num_parts}/{method}/f1/line_counts_{method}.csv") | |
| elif task_type_radio=="Code Completion": | |
| if "Tokens" in perspective_radio : | |
| df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv") | |
| elif "Lines" in perspective_radio: | |
| df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv") | |
| elif task_type_radio=="Test Generation": | |
| if "Tokens" in perspective_radio : | |
| df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv") | |
| elif "Lines" in perspective_radio: | |
| df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv") | |
| else: | |
| if "Tokens" in perspective_radio : | |
| df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv") | |
| elif "Lines" in perspective_radio: | |
| df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv") | |
| elif "Complexity" in perspective_radio: | |
| df = pd.read_csv(f"{base_path}/{num_parts}/{method}/CC_{method}.csv") | |
| elif "Problem Types" in perspective_radio: | |
| df = pd.read_csv(f"{base_path}/cata_result.csv") | |
| # 加载分析报告 | |
| # AI分析列 | |
| # df["Analysis"] = df["Model"].map(lambda m: analysis_result.get(m, "No analysis provided.")) | |
| df["Analysis"] = df["Model"].map( | |
| lambda m: highlight_adjectives(analysis_result.get(m, "No analysis provided.")) | |
| ) | |
| return df | |
| # 生成 CSS 样式 | |
| def generate_css(line_counts, token_counts, cyclomatic_complexity, problem_type, show_high, show_medium, show_low): | |
| css = """ | |
| #dataframe th { | |
| background-color: #f2f2f2 | |
| } | |
| """ | |
| colors = ["#e6f7ff", "#ffeecc", "#e6ffe6", "#ffe6e6"] | |
| categories = [line_counts, token_counts, cyclomatic_complexity] | |
| category_index = 0 | |
| column_index = 1 | |
| for category in categories: | |
| if category: | |
| if show_high: | |
| css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n" | |
| column_index += 1 | |
| if show_medium: | |
| css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n" | |
| column_index += 1 | |
| if show_low: | |
| css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n" | |
| column_index += 1 | |
| category_index += 1 | |
| # 为 Problem Type 相关的三个子列设置固定颜色 | |
| if problem_type: | |
| problem_type_color = "#d4f0fc" # 你可以选择任何你喜欢的颜色 | |
| css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {problem_type_color}; }}\n" | |
| css += f"#dataframe td:nth-child({column_index + 2}) {{ background-color: {problem_type_color}; }}\n" | |
| css += f"#dataframe td:nth-child({column_index + 3}) {{ background-color: {problem_type_color}; }}\n" | |
| # 隐藏 "data" 标识 | |
| css += """ | |
| .gradio-container .dataframe-container::before { | |
| content: none !important; | |
| } | |
| """ | |
| return css | |
| # AI分析 | |
| def load_analysis_report(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio): | |
| num_parts = num_parts_dropdown | |
| method = "QS" if division_method_radio == "Equal Frequency Partitioning" else "EI" | |
| # # 根据perspective确定文件路径 | |
| # if "Tokens" in perspective_radio: | |
| # perspective = "token_counts" | |
| # elif "Lines" in perspective_radio: | |
| # perspective = "line_counts" | |
| # elif "Complexity" in perspective_radio: | |
| # perspective = "CC" | |
| # else: | |
| # perspective = "problem_type" | |
| # base_path = f"./llm_insight/{task_type_radio}" | |
| # if perspective == "problem_type": | |
| # report_file = f"{base_path}/{dataset_radio}/{perspective}_report.json" | |
| # recommendation_file = f"{base_path}/{dataset_radio}/{perspective}_recommendation.json" | |
| # else: | |
| # report_file = f"{base_path}/{dataset_radio}/{num_parts}/{method}/{perspective}_report.json" | |
| # recommendation_file = f"{base_path}/{dataset_radio}/{num_parts}/{method}/{perspective}_recommendation.json" | |
| base_path = f"./llm_insight/{task_type_radio}" | |
| if task_type_radio=="Code Generation": | |
| # 根据perspective确定文件路径 | |
| if "Tokens" in perspective_radio: | |
| perspective = "token_counts" | |
| elif "Lines" in perspective_radio: | |
| perspective = "line_counts" | |
| elif "Complexity" in perspective_radio: | |
| perspective = "CC" | |
| else: | |
| perspective = "problem_type" | |
| if perspective == "problem_type": | |
| report_file = f"{base_path}/{dataset_radio}/{perspective}_report.json" | |
| recommendation_file = f"{base_path}/{dataset_radio}/{perspective}_recommendation.json" | |
| else: | |
| report_file = f"{base_path}/{dataset_radio}/{num_parts}/{method}/{perspective}_report.json" | |
| recommendation_file = f"{base_path}/{dataset_radio}/{num_parts}/{method}/{perspective}_recommendation.json" | |
| else: | |
| report_file = f"{base_path}/{dataset_radio}/report.json" | |
| recommendation_file = f"{base_path}/{dataset_radio}/recommendation.json" | |
| try: | |
| with open(report_file, 'r', encoding='utf-8') as f: | |
| analysis_result = json.load(f) | |
| except Exception as e: | |
| analysis_result = f"[Error] error load analysis report: {e}" | |
| try: | |
| with open(recommendation_file, 'r', encoding='utf-8') as f: | |
| recommendation_result = json.load(f) | |
| except Exception as e: | |
| recommendation_result = f"[Error] error load model recommendation: {e}" | |
| return (analysis_result,recommendation_result) | |
| # 可视化 | |
| # def plot_visualization(task_type_radio,dataset_radio, perspective_radio, num_parts, plot_type): | |
| # base_path = f"./dataset/{task_type_radio}/{dataset_radio}" | |
| # if "Tokens" in perspective_radio: | |
| # file_path = f'{base_path}/{num_parts}/QS/token_counts_QS.csv' | |
| # elif "Lines" in perspective_radio: | |
| # file_path = f'{base_path}/{num_parts}/QS/line_counts_QS.csv' | |
| # elif "Complexity" in perspective_radio: | |
| # file_path = f'{base_path}/{num_parts}/QS/CC_QS.csv' | |
| # else: # Problem Types | |
| # file_path = f'{base_path}/cata_result.csv' | |
| # df = pd.read_csv(file_path) | |
| # df.set_index('Model', inplace=True) | |
| # df_transposed = df.T | |
| # if plot_type == "Line Chart": | |
| # fig = px.line(df_transposed, | |
| # x=df_transposed.index, | |
| # y=df_transposed.columns, | |
| # title='Model Performance Across Different Subsets', | |
| # labels={'value': 'Evaluation Score', 'index': 'Subsets'}, | |
| # color_discrete_sequence=px.colors.qualitative.Plotly) | |
| # fig.update_traces(hovertemplate='%{y}') | |
| # elif plot_type == "Radar Chart": # Radar Chart | |
| # # 重新组织数据为雷达图所需格式 | |
| # radar_data = [] | |
| # for model in df.index: | |
| # for subset, score in df.loc[model].items(): | |
| # radar_data.append({ | |
| # 'Model': model, | |
| # 'Subset': subset, | |
| # 'Score': score | |
| # }) | |
| # radar_df = pd.DataFrame(radar_data) | |
| # colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] | |
| # # 创建雷达图 | |
| # fig = px.line_polar(radar_df, | |
| # r='Score', | |
| # theta='Subset', | |
| # color='Model', | |
| # line_close=True, | |
| # color_discrete_sequence=colors, | |
| # title='Model Performance Radar Chart') | |
| # # 自定义每个模型的线条样式 | |
| # for i, trace in enumerate(fig.data): | |
| # trace.update( | |
| # fill=None, # 移除填充 | |
| # line=dict( | |
| # width=2, | |
| # dash='solid' if i % 2 == 0 else 'dash', # 交替使用实线和虚线 | |
| # ) | |
| # ) | |
| # # 优化雷达图的显示 | |
| # fig.update_layout( | |
| # polar=dict( | |
| # radialaxis=dict( | |
| # visible=True, | |
| # range=[0, 100], | |
| # showline=True, | |
| # linewidth=1, | |
| # gridcolor='lightgrey' | |
| # ), | |
| # angularaxis=dict( | |
| # showline=True, | |
| # linewidth=1, | |
| # gridcolor='lightgrey' | |
| # ) | |
| # ), | |
| # showlegend=True, | |
| # legend=dict( | |
| # yanchor="middle", # 垂直居中 | |
| # y=0.5, | |
| # xanchor="left", | |
| # x=1.2, # 将图例移到雷达图右侧 | |
| # bgcolor="rgba(255, 255, 255, 0.8)", # 半透明白色背景 | |
| # bordercolor="lightgrey", # 添加边框 | |
| # borderwidth=1 | |
| # ), | |
| # margin=dict(r=150), # 增加右侧边距,为图例留出空间 | |
| # paper_bgcolor='white' | |
| # ) | |
| # else: # Heatmap | |
| # # 创建热力图 | |
| # fig = px.imshow(df_transposed, | |
| # labels=dict(x="Model", y="Subset", color="Score"), | |
| # color_continuous_scale="RdYlBu_r", # 使用科研风格配色:红-黄-蓝 | |
| # aspect="auto", # 自动调整宽高比 | |
| # title="Model Performance Heatmap") | |
| # # 优化热力图显示 | |
| # fig.update_layout( | |
| # title=dict( | |
| # text='Model Performance Distribution Across Subsets', | |
| # x=0.5, | |
| # y=0.95, | |
| # xanchor='center', | |
| # yanchor='top', | |
| # font=dict(size=14) | |
| # ), | |
| # xaxis=dict( | |
| # title="Model", | |
| # tickangle=45, # 斜着显示模型名称 | |
| # tickfont=dict(size=10), | |
| # side="bottom" | |
| # ), | |
| # yaxis=dict( | |
| # title="Subset", | |
| # tickfont=dict(size=10) | |
| # ), | |
| # coloraxis=dict( | |
| # colorbar=dict( | |
| # title="Score", | |
| # titleside="right", | |
| # tickfont=dict(size=10), | |
| # titlefont=dict(size=12), | |
| # len=0.9, # 色条长度 | |
| # ) | |
| # ), | |
| # margin=dict(t=80, r=100, b=80, l=80), # 调整边距 | |
| # paper_bgcolor='white', | |
| # plot_bgcolor='white' | |
| # ) | |
| # # 添加具体数值标注 | |
| # annotations = [] | |
| # for i in range(len(df_transposed.index)): | |
| # for j in range(len(df_transposed.columns)): | |
| # annotations.append( | |
| # dict( | |
| # x=j, | |
| # y=i, | |
| # text=f"{df_transposed.iloc[i, j]:.1f}", | |
| # showarrow=False, | |
| # font=dict(size=9, color='black') | |
| # ) | |
| # ) | |
| # fig.update_layout(annotations=annotations) | |
| # return fig | |
| def plot_visualization(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio,plot_type): | |
| # base_path = f"./dataset/{task_type_radio}/{dataset_radio}" | |
| # if "Tokens" in perspective_radio: | |
| # file_path = f'{base_path}/{num_parts}/QS/token_counts_QS.csv' | |
| # elif "Lines" in perspective_radio: | |
| # file_path = f'{base_path}/{num_parts}/QS/line_counts_QS.csv' | |
| # elif "Complexity" in perspective_radio: | |
| # file_path = f'{base_path}/{num_parts}/QS/CC_QS.csv' | |
| # else: # Problem Types | |
| # file_path = f'{base_path}/cata_result.csv' | |
| num_parts = num_parts_dropdown | |
| method = "QS" if division_method_radio == "Equal Frequency Partitioning" else "EI" | |
| base_path = f"./dataset/{task_type_radio}/{dataset_radio}" | |
| analysis_result,_ = load_analysis_report(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio) | |
| # 根据perspective选择读取对应的文件 | |
| if task_type_radio=="Api Recommendation": | |
| if "Tokens" in perspective_radio and "Recall" in perspective_radio: | |
| print(f"{base_path}/{num_parts}/{method}/recall/token_counts_{method}.csv") | |
| df = pd.read_csv(f"{base_path}/{num_parts}/{method}/recall/token_counts_{method}.csv") | |
| print(df) | |
| elif "Tokens" in perspective_radio and "F1" in perspective_radio: | |
| df = pd.read_csv(f"{base_path}/{num_parts}/{method}/f1/token_counts_{method}.csv") | |
| elif "Lines" in perspective_radio and "Recall" in perspective_radio: | |
| df = pd.read_csv(f"{base_path}/{num_parts}/{method}/recall/line_counts_{method}.csv") | |
| elif "Lines" in perspective_radio and "f1" in perspective_radio: | |
| df = pd.read_csv(f"{base_path}/{num_parts}/{method}/f1/line_counts_{method}.csv") | |
| elif task_type_radio=="Code Completion": | |
| if "Tokens" in perspective_radio : | |
| df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv") | |
| elif "Lines" in perspective_radio: | |
| df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv") | |
| elif task_type_radio=="Test Generation": | |
| if "Tokens" in perspective_radio : | |
| df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv") | |
| elif "Lines" in perspective_radio: | |
| df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv") | |
| else: | |
| if "Tokens" in perspective_radio : | |
| df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv") | |
| print(df) | |
| elif "Lines" in perspective_radio: | |
| df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv") | |
| elif "Complexity" in perspective_radio: | |
| df = pd.read_csv(f"{base_path}/{num_parts}/{method}/CC_{method}.csv") | |
| elif "Problem Types" in perspective_radio: | |
| df = pd.read_csv(f"{base_path}/cata_result.csv") | |
| if task_type_radio == "Code Generation": | |
| df.set_index('Model', inplace=True) | |
| df_transposed = df.T | |
| model_column_name = 'Model' # Store the column name for later use | |
| else: | |
| df.set_index('Models', inplace=True) | |
| df_transposed = df.T | |
| model_column_name = 'Models' # Store the column name for later use | |
| if plot_type == "Line Chart" and task_type_radio=="Api Recommendation": | |
| df_melted = df_transposed.reset_index().melt( | |
| id_vars="index", # 保留subset列(原列名) | |
| var_name=model_column_name, # 模型列名 | |
| value_name="Score" # 分数列 | |
| ) | |
| fig = px.line(df_transposed, | |
| x=df_transposed.index, | |
| y=df_transposed.columns, | |
| title='Model Performance Across Different Subsets', | |
| labels={'value': 'Evaluation Score', 'index': 'Subsets'}, | |
| color_discrete_sequence=px.colors.qualitative.Plotly | |
| ) | |
| fig.update_traces(hovertemplate='%{y}') | |
| if plot_type == "Line Chart" and task_type_radio!="Api Recommendation": | |
| fig = px.line(df_transposed, | |
| x=df_transposed.index, | |
| y=df_transposed.columns, | |
| title='Model Performance Across Different Subsets', | |
| labels={'value': 'Evaluation Score', 'index': 'Subsets'}, | |
| color_discrete_sequence=px.colors.qualitative.Plotly | |
| ) | |
| fig.update_traces(hovertemplate='%{y}') | |
| if plot_type == "Radar Chart": | |
| # Reorganize data for radar chart | |
| radar_data = [] | |
| for model in df.index: | |
| for subset, score in df.loc[model].items(): | |
| radar_data.append({ | |
| model_column_name: model, # Use the stored column name | |
| 'Subset': subset, | |
| 'Score': score | |
| }) | |
| radar_df = pd.DataFrame(radar_data) | |
| colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', | |
| '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] | |
| # Create radar chart | |
| fig = px.line_polar(radar_df, | |
| r='Score', | |
| theta='Subset', | |
| color=model_column_name, # Use the stored column name | |
| line_close=True, | |
| color_discrete_sequence=colors, | |
| title='Model Performance Radar Chart') | |
| # Customize line styles for each model | |
| for i, trace in enumerate(fig.data): | |
| trace.update( | |
| fill=None, # Remove fill | |
| line=dict( | |
| width=2, | |
| dash='solid' if i % 2 == 0 else 'dash', # Alternate solid and dashed lines | |
| ) | |
| ) | |
| # Optimize radar chart display | |
| fig.update_layout( | |
| polar=dict( | |
| radialaxis=dict( | |
| visible=True, | |
| range=[0, 100], | |
| showline=True, | |
| linewidth=1, | |
| gridcolor='lightgrey' | |
| ), | |
| angularaxis=dict( | |
| showline=True, | |
| linewidth=1, | |
| gridcolor='lightgrey' | |
| ) | |
| ), | |
| showlegend=True, | |
| legend=dict( | |
| yanchor="middle", | |
| y=0.5, | |
| xanchor="left", | |
| x=1.2, | |
| bgcolor="rgba(255, 255, 255, 0.8)", | |
| bordercolor="lightgrey", | |
| borderwidth=1 | |
| ), | |
| margin=dict(r=150), | |
| paper_bgcolor='white' | |
| ) | |
| if plot_type == "Radar Chart": | |
| # Reorganize data for radar chart | |
| radar_data = [] | |
| for model in df.index: | |
| for subset, score in df.loc[model].items(): | |
| radar_data.append({ | |
| model_column_name: model, # Use the stored column name | |
| 'Subset': subset, | |
| 'Score': score | |
| }) | |
| radar_df = pd.DataFrame(radar_data) | |
| colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', | |
| '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] | |
| # Create radar chart | |
| fig = px.line_polar(radar_df, | |
| r='Score', | |
| theta='Subset', | |
| color=model_column_name, # Use the stored column name | |
| line_close=True, | |
| color_discrete_sequence=colors, | |
| title='Model Performance Radar Chart') | |
| # Customize line styles for each model | |
| for i, trace in enumerate(fig.data): | |
| trace.update( | |
| fill=None, # Remove fill | |
| line=dict( | |
| width=2, | |
| dash='solid' if i % 2 == 0 else 'dash', # Alternate solid and dashed lines | |
| ) | |
| ) | |
| # Optimize radar chart display | |
| fig.update_layout( | |
| polar=dict( | |
| radialaxis=dict( | |
| visible=True, | |
| range=[0, 100], | |
| showline=True, | |
| linewidth=1, | |
| gridcolor='lightgrey' | |
| ), | |
| angularaxis=dict( | |
| showline=True, | |
| linewidth=1, | |
| gridcolor='lightgrey' | |
| ) | |
| ), | |
| showlegend=True, | |
| legend=dict( | |
| yanchor="middle", | |
| y=0.5, | |
| xanchor="left", | |
| x=1.2, | |
| bgcolor="rgba(255, 255, 255, 0.8)", | |
| bordercolor="lightgrey", | |
| borderwidth=1 | |
| ), | |
| margin=dict(r=150), | |
| paper_bgcolor='white' | |
| ) | |
| if plot_type == "Heatmap": | |
| # Create heatmap | |
| fig = px.imshow(df_transposed, | |
| labels=dict(x=model_column_name, y="Subset", color="Score"), # Use stored column name | |
| color_continuous_scale="RdYlBu_r", | |
| aspect="auto", | |
| title="Model Performance Heatmap") | |
| # Optimize heatmap display | |
| fig.update_layout( | |
| title=dict( | |
| text='Model Performance Distribution Across Subsets', | |
| x=0.5, | |
| y=0.95, | |
| xanchor='center', | |
| yanchor='top', | |
| font=dict(size=14) | |
| ), | |
| xaxis=dict( | |
| title=model_column_name, # Use stored column name | |
| tickangle=45, | |
| tickfont=dict(size=10), | |
| side="bottom" | |
| ), | |
| yaxis=dict( | |
| title="Subset", | |
| tickfont=dict(size=10) | |
| ), | |
| coloraxis=dict( | |
| colorbar=dict( | |
| title="Score", | |
| titleside="right", | |
| tickfont=dict(size=10), | |
| titlefont=dict(size=12), | |
| len=0.9, | |
| ) | |
| ), | |
| margin=dict(t=80, r=100, b=80, l=80), | |
| paper_bgcolor='white', | |
| plot_bgcolor='white' | |
| ) | |
| # Add value annotations | |
| annotations = [] | |
| for i in range(len(df_transposed.index)): | |
| for j in range(len(df_transposed.columns)): | |
| annotations.append( | |
| dict( | |
| x=j, | |
| y=i, | |
| text=f"{df_transposed.iloc[i, j]:.1f}", | |
| showarrow=False, | |
| font=dict(size=9, color='black') | |
| ) | |
| ) | |
| fig.update_layout(annotations=annotations) | |
| return fig | |
| # 桑基图展示推荐模型 | |
| def plot_recommendation_sankey(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio): | |
| import plotly.graph_objects as go | |
| from plotly.colors import sample_colorscale | |
| _, recommendation_result = load_analysis_report(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio) | |
| # 定义节点层级和颜色方案 | |
| levels = ['Model Recommendation', 'Scenario', 'Model Family', 'Specific Model'] | |
| color_scale = "RdYlBu_r" | |
| # 节点和连接数据 | |
| node_labels = [levels[0]] # 根节点 | |
| customdata = ["Root node"] | |
| sources, targets, values = [], [], [] | |
| # 节点索引跟踪 | |
| node_indices = {levels[0]: 0} | |
| current_idx = 1 | |
| # 处理推荐列表结构 {"场景1": [ {模型1:原因1}, {模型2:原因2} ], ...} | |
| for scenario, model_dicts in recommendation_result.items(): | |
| # 添加场景节点 | |
| scenario_label = " ".join(scenario.split()[:3]) + ("..." if len(scenario.split()) > 3 else "") | |
| node_labels.append(scenario_label) | |
| customdata.append(scenario) | |
| node_indices[f"scenario_{scenario}"] = current_idx | |
| current_idx += 1 | |
| # 根节点 -> 场景节点连接 | |
| sources.append(0) | |
| targets.append(node_indices[f"scenario_{scenario}"]) | |
| values.append(10) | |
| # 处理模型列表 [ {模型1:原因1}, {模型2:原因2} ] | |
| for model_dict in model_dicts: | |
| for model, reason in model_dict.items(): | |
| # 提取模型系列 (如"GPT-4" -> "GPT") | |
| family = model.split('-')[0].split('_')[0] | |
| # 添加模型系列节点 (如果不存在) | |
| if f"family_{family}" not in node_indices: | |
| node_labels.append(family) | |
| customdata.append(f"Model family: {family}") | |
| node_indices[f"family_{family}"] = current_idx | |
| current_idx += 1 | |
| # 场景 -> 模型系列连接 | |
| sources.append(node_indices[f"scenario_{scenario}"]) | |
| targets.append(node_indices[f"family_{family}"]) | |
| values.append(8) | |
| # 添加具体模型节点 (如果不存在) | |
| if f"model_{model}" not in node_indices: | |
| node_labels.append(model) | |
| customdata.append(f"<b>{model}</b><br>{reason}") | |
| node_indices[f"model_{model}"] = current_idx | |
| current_idx += 1 | |
| # 模型系列 -> 具体模型连接 | |
| sources.append(node_indices[f"family_{family}"]) | |
| targets.append(node_indices[f"model_{model}"]) | |
| values.append(5) | |
| # 生成颜色 (确保颜色数量匹配节点数量) | |
| node_colors = ["#2c7bb6"] # 根节点颜色 | |
| node_colors += sample_colorscale(color_scale, [n/(len(node_labels)-1) for n in range(1, len(node_labels))]) | |
| # 创建桑基图 | |
| fig = go.Figure(go.Sankey( | |
| arrangement="perpendicular", | |
| node=dict( | |
| pad=20, | |
| thickness=15, | |
| line=dict(color="rgba(0,0,0,0.3)", width=0.2), | |
| label=node_labels, | |
| color=node_colors, | |
| hovertemplate='%{label}<extra></extra>', | |
| x=[0] + [0.33]*len([n for n in node_indices if n.startswith('scenario_')]) | |
| + [0.66]*len([n for n in node_indices if n.startswith('family_')]) | |
| + [1.0]*len([n for n in node_indices if n.startswith('model_')]), | |
| ), | |
| link=dict( | |
| source=sources, | |
| target=targets, | |
| value=values, | |
| color="rgba(180,180,180,0.4)", | |
| customdata=[customdata[t] for t in targets], | |
| hovertemplate='%{customdata}<extra></extra>' | |
| ) | |
| )) | |
| fig.update_layout( | |
| title_text="<b>Model Recommendation Flow</b>", | |
| font_size=11, | |
| height=700, | |
| margin=dict(t=80, l=20, r=20, b=20) | |
| ) | |
| return fig | |
| ### Gradio代码部分 ### | |
| # 自定义 CSS 样式 | |
| custom_css = """ | |
| <style> | |
| body { | |
| font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; | |
| background-color: #f9f9f9; | |
| } | |
| .gr-label { | |
| font-size: 15px; | |
| } | |
| .gr-button-primary { | |
| background-color: #4CAF50; | |
| color: white; | |
| border-radius: 8px; | |
| } | |
| .gr-tabs > .tab-nav { | |
| background-color: #e0e0e0; | |
| border-bottom: 2px solid #ccc; | |
| } | |
| .gr-tabs > .tab-nav button.selected { | |
| background-color: #ffffff !important; | |
| border-bottom: 2px solid #4CAF50; | |
| } | |
| .gr-panel { | |
| padding: 20px; | |
| border-radius: 10px; | |
| box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); | |
| background-color: #fff; | |
| } | |
| .markdown-title { | |
| font-size: 1.5em; | |
| font-weight: bold; | |
| margin-bottom: 10px; | |
| } | |
| .analysis-box { | |
| background-color: #f1f8ff; | |
| padding: 20px; | |
| border-left: 5px solid #4CAF50; | |
| border-radius: 6px; | |
| margin-top: 10px; | |
| } | |
| .recommendation-box { | |
| background-color: #fff3cd; | |
| padding: 20px; | |
| border-left: 5px solid #ff9800; | |
| border-radius: 6px; | |
| margin-top: 10px; | |
| } | |
| </style> | |
| """ | |
| SERVER_URL = "http://10.249.190.53:8000/upload" | |
| # 构建界面 | |
| def update_dataset(task): | |
| if task == "Code Generation": | |
| return gr.update(choices=["HumanEval", "MBPP"]) | |
| elif task== "Code Completion": | |
| return gr.update(choices=["ComplexCodeEval-Python","ComplexCodeEval-Java"]) | |
| elif task == "Api Recommendation": | |
| return gr.update(choices=["ComplexCodeEval-Python","ComplexCodeEval-Java"]) | |
| elif task == "Test Generation": | |
| return gr.update(choices=["ComplexCodeEval-Python","ComplexCodeEval-Java"]) | |
| with gr.Blocks(css=custom_css) as iface: | |
| gr.HTML(""" | |
| <div style='text-align:center; padding:5px;'> | |
| <h1>Multi-view Code LLM Leaderboard</h1> | |
| <p>Multi-view Leaderboard: Towards Evaluating the Code Intelligence of LLMs From Multiple Views</p> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| # 配置相关 | |
| with gr.Column(scale=1): | |
| task_type_radio = gr.Radio( | |
| ["Code Generation", "Code Completion", "Api Recommendation", "Test Generation"], | |
| label="Select Task Type", | |
| value="Code Generation" | |
| ) | |
| dataset_radio = gr.Radio( | |
| ["HumanEval", "MBPP",'ComplexCodeEval'], | |
| label="Select a dataset", | |
| value="HumanEval" | |
| ) | |
| num_parts_slider = gr.Slider( | |
| minimum=3, | |
| maximum=8, | |
| step=1, | |
| label="Choose the Number of Subsets", | |
| value=3 | |
| ) | |
| # 将多个checkbox改为一个radio | |
| perspective_radio = gr.Radio( | |
| ["I - Num of Tokens in Problem Desc", | |
| "II - Num of Lines in Problem Desc", | |
| "III - Complexity of Reference Code", | |
| "IV - Problem Types"], | |
| label="Choose Perspective", | |
| value="I - Num of Tokens in Problem Desc" | |
| ) | |
| # 统一的division method radio | |
| division_method_radio = gr.Radio( | |
| ["Equal Frequency Partitioning", "Equal Interval Partitioning"], | |
| label="Choose the Division Method", | |
| visible=True | |
| ) | |
| confirm_btn = gr.Button("Confirm", variant="primary") | |
| # 核心展示 | |
| with gr.Column(scale=2): | |
| with gr.Tabs(): | |
| # 表格 | |
| with gr.TabItem("Ranking Table"): | |
| data_table = gr.Dataframe(headers=["Model", "Score","Analysis"], | |
| interactive=True, | |
| datatype="html", # 指定第三列为HTML | |
| render=True, # 启用HTML渲染 | |
| ) | |
| # 可视化 | |
| with gr.TabItem("Visualization"): | |
| plot_type = gr.Radio( | |
| choices=["Line Chart", "Radar Chart","Heatmap"], | |
| label="Select Plot Type", | |
| value="Line Chart" | |
| ) | |
| chart = gr.Plot() | |
| # AI分析 | |
| with gr.TabItem("Model selection suggestions"): | |
| with gr.Column(): | |
| # gr.Markdown("<h2 class='markdown-title'>🎯 Model Recommendation</h2>") | |
| recommendation_plot = gr.Plot() | |
| # #*********************上传文件界面布局****************** | |
| # with gr.TabItem("Upload inference result"): | |
| # print("new!!!!!!!!!!!!!!!!") | |
| # with gr.Column(scale=1): | |
| # upload_file = gr.File( | |
| # label="📤 上传JSON结果文件", | |
| # type="filepath", | |
| # file_types=[".json"], | |
| # height=100 | |
| # ) | |
| # task_choice = gr.Radio( | |
| # label="Select Evaluation Task", | |
| # choices=["Code Generation", "Code Completion", "Api Recommendation", "Test Generation"], | |
| # value="Code Generation" | |
| # ) | |
| # dataset_choice = gr.Radio( | |
| # ["HumanEval", "MBPP"], | |
| # label="Select a dataset", | |
| # value="HumanEval", | |
| # interactive=True | |
| # ) | |
| # task_choice.change(fn=update_dataset, inputs=task_choice, outputs=dataset_choice) | |
| # with gr.Column(scale=2): | |
| # # 状态显示区域 | |
| # status = gr.Textbox( | |
| # label="📊 处理状态", | |
| # interactive=False, | |
| # lines=4, | |
| # placeholder="等待文件上传..." | |
| # ) | |
| # # 操作按钮区域 | |
| # with gr.Row(): | |
| # submit_btn = gr.Button("🚀 提交到服务器", variant="primary") | |
| # clear_btn = gr.Button("🧹 清除所有") | |
| # 按钮动作 | |
| # submit_btn.click( | |
| # fn=send_to_backend, | |
| # inputs=[upload_file, task_choice, dataset_choice], | |
| # outputs=status | |
| # ) | |
| # clear_btn.click( | |
| # fn=lambda: (None, "Code Generation", "HumanEval", "状态已重置"), | |
| # inputs=None, | |
| # outputs=[upload_file, task_choice, dataset_choice, status] | |
| # ) | |
| # with gr.Column(scale=2): | |
| # status = gr.Textbox(label="Status") | |
| # submit_btn = gr.Button("Send to Server") | |
| # submit_btn.click(fn=send_to_backend, | |
| # inputs=[upload_file,task_choice, dataset_choice], | |
| # outputs=status | |
| # ) | |
| # 根据任务类型切换数据集 | |
| def update_dataset_options(task_type): | |
| if task_type == "Code Generation": | |
| return gr.update(choices=["HumanEval", "MBPP"]) | |
| elif task_type == "Code Completion": | |
| return gr.update(choices=["ComplexCodeEval-Python","ComplexCodeEval-Java"]) | |
| elif task_type == "Api Recommendation": | |
| return gr.update(choices=["ComplexCodeEval-Python","ComplexCodeEval-Java"]) | |
| elif task_type == "Test Generation": | |
| return gr.update(choices=["ComplexCodeEval-Python","ComplexCodeEval-Java"]) | |
| # 根据数据集切换拆分角度 | |
| def update_perspective_options(task,dataset): | |
| if dataset == "MBPP": | |
| return gr.update(choices=[ | |
| "I - Num of Tokens in Problem Desc", | |
| "III - Complexity of Reference Code", | |
| "IV - Problem Types" | |
| ]) | |
| elif dataset =="HumanEval": | |
| return gr.update(choices=[ | |
| "I - Num of Tokens in Problem Desc", | |
| "II - Num of Lines in Problem Desc", | |
| "III - Complexity of Reference Code", | |
| "IV - Problem Types" | |
| ]) | |
| elif task == "Api Recommendation": | |
| return gr.update(choices=[ | |
| "I - Num of Tokens in Problem Desc(Eval Metric:Recall)", | |
| "II - Num of Tokens in Problem Desc(Eval Metric:F1)", | |
| "III - Num of Lines in Problem Desc(Eval Metric:Recall)", | |
| "IV - Num of Lines in Problem Desc(Eval Metric:f1)" | |
| ]) | |
| elif task == "Code Completion" or "Test Generation": | |
| return gr.update(choices=[ | |
| "I - Num of Tokens in Problem Desc(Eval Metric:ES)", | |
| "II - Num of Lines in Problem Desc(Eval Metric:ES)" | |
| ]) | |
| dataset_radio.change( | |
| fn=update_perspective_options, | |
| inputs=[task_type_radio,dataset_radio], | |
| outputs=perspective_radio | |
| ) | |
| # 绑定事件 | |
| # confirm_btn.click( | |
| # fn=on_confirm, | |
| # inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio], | |
| # outputs=data_table | |
| # ).then( | |
| # fn=load_analysis_report, | |
| # inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio], | |
| # outputs=[gr.State()] | |
| # ).then( | |
| # fn=plot_visualization, | |
| # inputs=[task_type_radio,dataset_radio, perspective_radio, num_parts_slider, plot_type], | |
| # outputs=chart | |
| # ).then( | |
| # fn=plot_recommendation_sankey, | |
| # inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio], | |
| # outputs=[recommendation_plot] # 注意这里是列表 | |
| # ) | |
| confirm_btn.click( | |
| fn=on_confirm, | |
| inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio], | |
| outputs=data_table | |
| ).then( | |
| fn=load_analysis_report, | |
| inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio], | |
| outputs=[gr.State()] | |
| ).then( | |
| fn=plot_visualization, | |
| inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio,plot_type], | |
| outputs=chart | |
| ).then( | |
| fn=plot_recommendation_sankey, | |
| inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio], | |
| outputs=[recommendation_plot] # 注意这里是列表 | |
| ) | |
| plot_type.change( | |
| fn=plot_visualization, | |
| inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio,plot_type], | |
| outputs=chart | |
| ) | |
| # plot_type.change( | |
| # fn=plot_visualization, | |
| # inputs=[task_type_radio,dataset_radio, perspective_radio, num_parts_slider, plot_type], | |
| # outputs=chart | |
| # ) | |
| task_type_radio.change( | |
| fn=update_dataset_options, | |
| inputs=task_type_radio, | |
| outputs=dataset_radio | |
| ) | |
| # 启动界面 | |
| iface.launch() |