import gradio as gr import pandas as pd import requests import os import shutil import json import pandas as pd import subprocess import plotly.express as px def on_confirm(dataset_radio, num_parts_dropdown, token_counts_radio, line_counts_radio, cyclomatic_complexity_radio, problem_type_checkbox): # 根据用户选择的参数构建文件路径 num_parts = num_parts_dropdown # token_counts_split = token_counts_radio # line_counts_split = line_counts_radio # cyclomatic_complexity_split = cyclomatic_complexity_radio # 读取数据 dataframes = [] if dataset_radio == "HumanEval": if token_counts_radio=="Equal Frequency Partitioning":#等频划分,每个子集数据点的数量基本一致 token_counts_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets/{num_parts}/QS/token_counts_QS.csv") dataframes.append(token_counts_df) if token_counts_radio=="Equal Interval Partitioning": token_counts_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets/{num_parts}/EI/token_counts_EI.csv") dataframes.append(token_counts_df) if line_counts_radio=="Equal Frequency Partitioning":#等频划分,每个子集数据点的数量基本一致 line_counts_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets/{num_parts}/QS/line_counts_QS.csv") dataframes.append(line_counts_df) if token_counts_radio=="Equal Interval Partitioning": line_counts_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets/{num_parts}/EI/line_counts_EI.csv") dataframes.append(line_counts_df) if cyclomatic_complexity_radio=="Equal Frequency Partitioning":#等频划分,每个子集数据点的数量基本一致 CC_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets/{num_parts}/QS/CC_QS.csv") dataframes.append(CC_df) if token_counts_radio=="Equal Interval Partitioning": CC_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets/{num_parts}/EI/CC_EI.csv") dataframes.append(CC_df) #以下改为直接从一个划分文件中读取即可 if problem_type_checkbox: problem_type_df = pd.read_csv("/home/user/app/dividing_into_different_subsets/cata_result.csv") dataframes.append(problem_type_df) if dataset_radio == "MBPP": if token_counts_radio=="Equal Frequency Partitioning":#等频划分,每个子集数据点的数量基本一致 token_counts_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets_mbpp/{num_parts}/QS/token_counts_QS.csv") dataframes.append(token_counts_df) if token_counts_radio=="Equal Interval Partitioning": token_counts_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets_mbpp/{num_parts}/EI/token_counts_EI.csv") dataframes.append(token_counts_df) if line_counts_radio=="Equal Frequency Partitioning":#等频划分,每个子集数据点的数量基本一致 line_counts_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets_mbpp/{num_parts}/QS/line_counts_QS.csv") dataframes.append(line_counts_df) if token_counts_radio=="Equal Interval Partitioning": line_counts_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets_mbpp/{num_parts}/EI/line_counts_EI.csv") dataframes.append(line_counts_df) if cyclomatic_complexity_radio=="Equal Frequency Partitioning":#等频划分,每个子集数据点的数量基本一致 CC_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets_mbpp/{num_parts}/QS/CC_QS.csv") dataframes.append(CC_df) if token_counts_radio=="Equal Interval Partitioning": CC_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets_mbpp/{num_parts}/EI/CC_EI.csv") dataframes.append(CC_df) #以下改为直接从一个划分文件中读取即可 if problem_type_checkbox: problem_type_df = pd.read_csv("/home/user/app/dividing_into_different_subsets_mbpp/cata_result.csv") dataframes.append(problem_type_df) # 如果所有三个radio都有value,将三个文件中的所有行拼接 if len(dataframes) > 0: combined_df = dataframes[0] for df in dataframes[1:]: combined_df = pd.merge(combined_df, df, left_index=True, right_index=True, suffixes=('', '_y')) combined_df = combined_df.loc[:, ~combined_df.columns.str.endswith('_y')] # 去除重复的列 return combined_df else: return pd.DataFrame() def execute_specified_python_files(directory_list, file_list): for directory in directory_list: for py_file in file_list: file_path = os.path.join(directory, py_file) if os.path.isfile(file_path) and py_file.endswith('.py'): print(f"Executing {file_path}...") try: # 使用subprocess执行Python文件 subprocess.run(['python', file_path], check=True) print(f"{file_path} executed successfully.") except subprocess.CalledProcessError as e: print(f"Error executing {file_path}: {e}") else: print(f"File {file_path} does not exist or is not a Python file.") # 定义一个函数来生成 CSS 样式 def generate_css(line_counts, token_counts, cyclomatic_complexity, problem_type, show_high, show_medium, show_low): css = """ #dataframe th { background-color: #f2f2f2 } """ colors = ["#e6f7ff", "#ffeecc", "#e6ffe6", "#ffe6e6"] categories = [line_counts, token_counts, cyclomatic_complexity] category_index = 0 column_index = 1 for category in categories: if category: if show_high: css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n" column_index += 1 if show_medium: css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n" column_index += 1 if show_low: css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n" column_index += 1 category_index += 1 # 为 Problem Type 相关的三个子列设置固定颜色 if problem_type: problem_type_color = "#d4f0fc" # 你可以选择任何你喜欢的颜色 css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {problem_type_color}; }}\n" css += f"#dataframe td:nth-child({column_index + 2}) {{ background-color: {problem_type_color}; }}\n" css += f"#dataframe td:nth-child({column_index + 3}) {{ background-color: {problem_type_color}; }}\n" # 隐藏 "data" 标识 css += """ .gradio-container .dataframe-container::before { content: none !important; } """ return css def update_radio_options(token_counts, line_counts, cyclomatic_complexity, problem_type): options = [] if token_counts: options.append("The Number of Tokens in Problem Descriptions") if line_counts: options.append("The Number of Lines in Problem Descriptions") if cyclomatic_complexity: options.append("The Complexity of Reference Code") if problem_type: options.append("Problem Type") return gr.update(choices=options) def plot_csv(dataset_radio,radio,num): print(dataset_radio,radio) if dataset_radio=="HumanEval": if radio=="The Number of Tokens in Problem Descriptions": radio_choice="token_counts" file_path = f'/home/user/app/dividing_into_different_subsets/{num}/QS/{radio_choice}_QS.csv' elif radio=="The Number of Lines in Problem Descriptions": radio_choice="line_counts" file_path = f'/home/user/app/dividing_into_different_subsets/{num}/QS/{radio_choice}_QS.csv' elif radio=="The Complexity of Reference Code": radio_choice="CC" file_path = f'/home/user/app/dividing_into_different_subsets/{num}/QS/{radio_choice}_QS.csv' elif radio=="Problem Type": radio_choice="problem_type" file_path = f'/home/user/app/dividing_into_different_subsets/cata_result.csv' print("test!") elif dataset_radio=="MBPP": if radio=="The Number of Tokens in Problem Descriptions": radio_choice="token_counts" file_path = f'/home/user/app/dividing_into_different_subsets_mbpp/{num}/QS/{radio_choice}_QS.csv' elif radio=="The Number of Lines in Problem Descriptions": radio_choice="line_counts" file_path = f'/home/user/app/dividing_into_different_subsets_mbpp/{num}/QS/{radio_choice}_QS.csv' elif radio=="The Complexity of Reference Code": radio_choice="CC" file_path = f'/home/user/app/dividing_into_different_subsets_mbpp/{num}/QS/{radio_choice}_QS.csv' elif radio=="Problem Type": radio_choice="problem_type" file_path = f'/home/user/app/dividing_into_different_subsets_mbpp/cata_result.csv' print("test!") # file_path="E:/python-testn/pythonProject3/hh_1/dividing_into_different_subsets/3/QS/CC_QS.csv" df = pd.read_csv(file_path) # 将第一列作为索引 df.set_index('Model', inplace=True) # 转置数据框,使得模型作为列,横轴作为行 df_transposed = df.T # 使用plotly绘制折线图 fig = px.line(df_transposed, x=df_transposed.index, y=df_transposed.columns, title='Model Evaluation Results', labels={'value': 'Evaluation Score', 'index': 'Evaluation Metric'}, color_discrete_sequence=px.colors.qualitative.Plotly) # 设置悬停效果 fig.update_traces(hovertemplate='%{y}') return fig def toggle_radio(checkbox, radio): return gr.update(visible=checkbox) def toggle_line_counts_visibility(dataset): if dataset == "MBPP": return gr.update(visible=False) else: return gr.update(visible=True) # 创建 Gradio 界面 import gradio as gr with gr.Blocks() as iface: gr.HTML(""" """) with gr.Tabs() as tabs: with gr.TabItem("Evaluation Result"): with gr.Row(): with gr.Column(scale=2): with gr.Row(): with gr.Column(): dataset_radio = gr.Radio(["HumanEval", "MBPP"], label="Select Dataset ") with gr.Row(): custom_css = """ """ with gr.Column(): gr.Markdown( f"{custom_css}