import gradio as gr import pandas as pd import requests import os import shutil import json import pandas as pd import subprocess import plotly.express as px def on_confirm(dataset_radio, num_parts_dropdown, token_counts_radio, line_counts_radio, cyclomatic_complexity_radio, problem_type_checkbox): # 根据用户选择的参数构建文件路径 num_parts = num_parts_dropdown # token_counts_split = token_counts_radio # line_counts_split = line_counts_radio # cyclomatic_complexity_split = cyclomatic_complexity_radio # 读取数据 dataframes = [] if dataset_radio == "HumanEval": if token_counts_radio=="Equal Frequency Partitioning":#等频划分,每个子集数据点的数量基本一致 token_counts_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets/{num_parts}/QS/token_counts_QS.csv") dataframes.append(token_counts_df) if token_counts_radio=="Equal Interval Partitioning": token_counts_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets/{num_parts}/EI/token_counts_EI.csv") dataframes.append(token_counts_df) if line_counts_radio=="Equal Frequency Partitioning":#等频划分,每个子集数据点的数量基本一致 line_counts_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets/{num_parts}/QS/line_counts_QS.csv") dataframes.append(line_counts_df) if token_counts_radio=="Equal Interval Partitioning": line_counts_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets/{num_parts}/EI/line_counts_EI.csv") dataframes.append(line_counts_df) if cyclomatic_complexity_radio=="Equal Frequency Partitioning":#等频划分,每个子集数据点的数量基本一致 CC_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets/{num_parts}/QS/CC_QS.csv") dataframes.append(CC_df) if token_counts_radio=="Equal Interval Partitioning": CC_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets/{num_parts}/EI/CC_EI.csv") dataframes.append(CC_df) #以下改为直接从一个划分文件中读取即可 if problem_type_checkbox: problem_type_df = pd.read_csv("/home/user/app/dividing_into_different_subsets/cata_result.csv") dataframes.append(problem_type_df) if dataset_radio == "MBPP": if token_counts_radio=="Equal Frequency Partitioning":#等频划分,每个子集数据点的数量基本一致 token_counts_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets_mbpp/{num_parts}/QS/token_counts_QS.csv") dataframes.append(token_counts_df) if token_counts_radio=="Equal Interval Partitioning": token_counts_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets_mbpp/{num_parts}/EI/token_counts_EI.csv") dataframes.append(token_counts_df) if line_counts_radio=="Equal Frequency Partitioning":#等频划分,每个子集数据点的数量基本一致 line_counts_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets_mbpp/{num_parts}/QS/line_counts_QS.csv") dataframes.append(line_counts_df) if token_counts_radio=="Equal Interval Partitioning": line_counts_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets_mbpp/{num_parts}/EI/line_counts_EI.csv") dataframes.append(line_counts_df) if cyclomatic_complexity_radio=="Equal Frequency Partitioning":#等频划分,每个子集数据点的数量基本一致 CC_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets_mbpp/{num_parts}/QS/CC_QS.csv") dataframes.append(CC_df) if token_counts_radio=="Equal Interval Partitioning": CC_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets_mbpp/{num_parts}/EI/CC_EI.csv") dataframes.append(CC_df) #以下改为直接从一个划分文件中读取即可 if problem_type_checkbox: problem_type_df = pd.read_csv("/home/user/app/dividing_into_different_subsets_mbpp/cata_result.csv") dataframes.append(problem_type_df) # 如果所有三个radio都有value,将三个文件中的所有行拼接 if len(dataframes) > 0: combined_df = dataframes[0] for df in dataframes[1:]: combined_df = pd.merge(combined_df, df, left_index=True, right_index=True, suffixes=('', '_y')) combined_df = combined_df.loc[:, ~combined_df.columns.str.endswith('_y')] # 去除重复的列 return combined_df else: return pd.DataFrame() def execute_specified_python_files(directory_list, file_list): for directory in directory_list: for py_file in file_list: file_path = os.path.join(directory, py_file) if os.path.isfile(file_path) and py_file.endswith('.py'): print(f"Executing {file_path}...") try: # 使用subprocess执行Python文件 subprocess.run(['python', file_path], check=True) print(f"{file_path} executed successfully.") except subprocess.CalledProcessError as e: print(f"Error executing {file_path}: {e}") else: print(f"File {file_path} does not exist or is not a Python file.") # 定义一个函数来生成 CSS 样式 def generate_css(line_counts, token_counts, cyclomatic_complexity, problem_type, show_high, show_medium, show_low): css = """ #dataframe th { background-color: #f2f2f2 } """ colors = ["#e6f7ff", "#ffeecc", "#e6ffe6", "#ffe6e6"] categories = [line_counts, token_counts, cyclomatic_complexity] category_index = 0 column_index = 1 for category in categories: if category: if show_high: css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n" column_index += 1 if show_medium: css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n" column_index += 1 if show_low: css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n" column_index += 1 category_index += 1 # 为 Problem Type 相关的三个子列设置固定颜色 if problem_type: problem_type_color = "#d4f0fc" # 你可以选择任何你喜欢的颜色 css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {problem_type_color}; }}\n" css += f"#dataframe td:nth-child({column_index + 2}) {{ background-color: {problem_type_color}; }}\n" css += f"#dataframe td:nth-child({column_index + 3}) {{ background-color: {problem_type_color}; }}\n" # 隐藏 "data" 标识 css += """ .gradio-container .dataframe-container::before { content: none !important; } """ return css def update_radio_options(token_counts, line_counts, cyclomatic_complexity, problem_type): options = [] if token_counts: options.append("The Number of Tokens in Problem Descriptions") if line_counts: options.append("The Number of Lines in Problem Descriptions") if cyclomatic_complexity: options.append("The Complexity of Reference Code") if problem_type: options.append("Problem Type") return gr.update(choices=options) def plot_csv(dataset_radio,radio,num): print(dataset_radio,radio) if dataset_radio=="HumanEval": if radio=="The Number of Tokens in Problem Descriptions": radio_choice="token_counts" file_path = f'/home/user/app/dividing_into_different_subsets/{num}/QS/{radio_choice}_QS.csv' elif radio=="The Number of Lines in Problem Descriptions": radio_choice="line_counts" file_path = f'/home/user/app/dividing_into_different_subsets/{num}/QS/{radio_choice}_QS.csv' elif radio=="The Complexity of Reference Code": radio_choice="CC" file_path = f'/home/user/app/dividing_into_different_subsets/{num}/QS/{radio_choice}_QS.csv' elif radio=="Problem Type": radio_choice="problem_type" file_path = f'/home/user/app/dividing_into_different_subsets/cata_result.csv' print("test!") elif dataset_radio=="MBPP": if radio=="The Number of Tokens in Problem Descriptions": radio_choice="token_counts" file_path = f'/home/user/app/dividing_into_different_subsets_mbpp/{num}/QS/{radio_choice}_QS.csv' elif radio=="The Number of Lines in Problem Descriptions": radio_choice="line_counts" file_path = f'/home/user/app/dividing_into_different_subsets_mbpp/{num}/QS/{radio_choice}_QS.csv' elif radio=="The Complexity of Reference Code": radio_choice="CC" file_path = f'/home/user/app/dividing_into_different_subsets_mbpp/{num}/QS/{radio_choice}_QS.csv' elif radio=="Problem Type": radio_choice="problem_type" file_path = f'/home/user/app/dividing_into_different_subsets_mbpp/cata_result.csv' print("test!") # file_path="E:/python-testn/pythonProject3/hh_1/dividing_into_different_subsets/3/QS/CC_QS.csv" df = pd.read_csv(file_path) # 将第一列作为索引 df.set_index('Model', inplace=True) # 转置数据框,使得模型作为列,横轴作为行 df_transposed = df.T # 使用plotly绘制折线图 fig = px.line(df_transposed, x=df_transposed.index, y=df_transposed.columns, title='Model Evaluation Results', labels={'value': 'Evaluation Score', 'index': 'Evaluation Metric'}, color_discrete_sequence=px.colors.qualitative.Plotly) # 设置悬停效果 fig.update_traces(hovertemplate='%{y}') return fig def toggle_radio(checkbox, radio): return gr.update(visible=checkbox) def toggle_line_counts_visibility(dataset): if dataset == "MBPP": return gr.update(visible=False) else: return gr.update(visible=True) # 创建 Gradio 界面 import gradio as gr with gr.Blocks() as iface: gr.HTML(""" """) with gr.Tabs() as tabs: with gr.TabItem("Evaluation Result"): with gr.Row(): with gr.Column(scale=2): with gr.Row(): with gr.Column(): dataset_radio = gr.Radio(["HumanEval", "MBPP"], label="Select Dataset ") with gr.Row(): custom_css = """ """ with gr.Column(): gr.Markdown( f"{custom_css}
Choose Division Perspective
") token_counts_checkbox = gr.Checkbox(label="I-The Number of Tokens in Problem Descriptions") line_counts_checkbox = gr.Checkbox(label="II-The Number of Lines in Problem Descriptions") dataset_radio.change(fn=toggle_line_counts_visibility, inputs=dataset_radio, outputs=line_counts_checkbox) cyclomatic_complexity_checkbox = gr.Checkbox(label="III-The Complexity of Reference Code") problem_type_checkbox = gr.Checkbox(label="IV-Problem Types ") css_code = """ .dropdown-container { display: none; } """ with gr.Column(): # gr.Markdown("
Choose Subsets
") num_parts_dropdown = gr.Dropdown(choices=[0,3, 4, 5, 6, 7, 8], label="Choose the Number of Subsets",value="") with gr.Row(): with gr.Column(): token_counts_radio = gr.Radio( ["Equal Frequency Partitioning", "Equal Interval Partitioning"], label="Choose the Division Method for Perspective-I", visible=False) with gr.Column(): line_counts_radio = gr.Radio( ["Equal Frequency Partitioning", "Equal Interval Partitioning"], label="Choose the Division Method for Perspective-II", visible=False) with gr.Column(): cyclomatic_complexity_radio = gr.Radio( ["Equal Frequency Partitioning", "Equal Interval Partitioning"], label="Choose the Division Method for Perspective-III", visible=False) token_counts_checkbox.change(fn=lambda x: toggle_radio(x, token_counts_radio), inputs=token_counts_checkbox, outputs=token_counts_radio) line_counts_checkbox.change(fn=lambda x: toggle_radio(x, line_counts_radio), inputs=line_counts_checkbox, outputs=line_counts_radio) cyclomatic_complexity_checkbox.change(fn=lambda x: toggle_radio(x, cyclomatic_complexity_radio), inputs=cyclomatic_complexity_checkbox, outputs=cyclomatic_complexity_radio) with gr.Tabs() as inner_tabs: with gr.TabItem("Ranking Table"): dataframe_output = gr.Dataframe(elem_id="dataframe") css_output = gr.HTML() confirm_button = gr.Button("Confirm ") confirm_button.click(fn=on_confirm, inputs=[dataset_radio, num_parts_dropdown, token_counts_radio, line_counts_radio, cyclomatic_complexity_radio, problem_type_checkbox], outputs=dataframe_output) with gr.TabItem("Line chart"): select_radio = gr.Radio(choices=[], label="Select One Perpective") checkboxes = [token_counts_checkbox, line_counts_checkbox, cyclomatic_complexity_checkbox, problem_type_checkbox] for checkbox in checkboxes: checkbox.change(fn=update_radio_options, inputs=checkboxes, outputs=select_radio) select_radio.change(fn=plot_csv, inputs=[dataset_radio, select_radio, num_parts_dropdown], outputs=gr.Plot(label="Line Plot ")) # with gr.TabItem("Upload Inference File"): # gr.Markdown("Upload a JSON file") # with gr.Row(): # with gr.Column(): # string_input = gr.Textbox(label="Enter the Model Name") # number_input = gr.Number(label="Select the Number of Samples") # dataset_choice = gr.Dropdown(label="Select Dataset", choices=["HumanEval", "MBPP"]) # with gr.Column(): # file_input = gr.File(label="Upload Generation Result in JSON file") # upload_button = gr.Button("Confirm and Upload") # json_output = gr.JSON(label="") # upload_button.click(fn=generate_file, inputs=[file_input, string_input, number_input, dataset_choice], # outputs=json_output) css = """ #scale1 { border: 1px solid rgba(0, 0, 0, 0.2); padding: 10px; border-radius: 8px; background-color: #f9f9f9; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); } } """ gr.HTML(f"") # 初始化数据表格 # initial_df = show_data(False, False, False, False, False, False, False) # initial_css = generate_css(False, False, False, False, True, False, False) # dataframe_output.value = initial_df # css_output.value = f"" # 启动界面 iface.launch()