import copy import numpy as np import pandas as pd def process_plot_data(df, flag=False): # 保留"Model"和"Domain"列,删除其他列 df2 = df[["Model", "Domain"]].copy() columns_names = ["Model", "Domain", "AR", "ER", "NER", "JS", "CR", "CFM", "SCM", "CP", "PTP", "CTP", "LQA", "JRG", "CU", "LC", "JRG-TAG", "LC-TAG"] # 计算新的列的值 for col in columns_names[2:]: if col in ["AR", "ER", "CR", "CFM", "SCM", "CP", "PTP", "CTP", "LQA"]: df2[col] = df[f"{col}-F1"] * 100 if col == "NER": df2[col] = df[f"{col}-Acc"] * 100 if col in ["JRG", "LC", "JS", "CU", "JRG-TAG", "LC-TAG"]: rouge_mean = df[[f"{col}-ROUGE-1", f"{col}-ROUGE-2", f"{col}-ROUGE-L"]].replace('-', np.nan).mean(axis=1) df2.loc[df[f"{col}-ROUGE-1"] == '-', col] = '-' df2.loc[df[f"{col}-ROUGE-1"] != '-', col] = rouge_mean * 100 # if col in ["JS", "CU"]: # df2[col] = df[[f"{col}-ROUGE-1", f"{col}-ROUGE-2", f"{col}-ROUGE-L"]].mean(axis=1) * 100 df2.reindex(columns=columns_names) flag = True if flag: # 保存到Excel文件 with pd.ExcelWriter('scores.xlsx') as writer: df2.to_excel(writer, sheet_name="Sheet1", index=False) return df2 def plot_data(): # read df and replace NaN values with an empty string leaderboard_df = pd.read_excel( 'leaderboard.xlsx', sheet_name='Sheet2', header=0, usecols='A:BE', nrows=18) leaderboard_df.fillna("-") df = process_plot_data(leaderboard_df) # df.drop(df[df['Model'] == 'Baichuan-7B'].index, inplace=True) df_BIR = df.iloc[:, [0] + list(range(2, 7))] df_LFI = df.iloc[:, [0] + list(range(7, 13))] df_CLA = df.iloc[:, [0] + list(range(13, 16))] # Get df_overall df_overall = df.iloc[:, [0] + list(range(2, 15))] plot_df_dict = { "Overall": df_overall, "Basic Information Retrieval": df_BIR, "Legal Foundation Inference": df_LFI, "Complex Legal Application": df_CLA, } return plot_df_dict def tab_data(): # read df and replace NaN values with an empty string leaderboard_df = pd.read_excel( 'leaderboard.xlsx', sheet_name='Sheet2', header=0, usecols='A:BE', nrows=18) leaderboard_df.fillna("-") # leaderboard_df.drop(leaderboard_df[leaderboard_df['Model'] == 'Baichuan-7B'].index, inplace=True) df_BIR = leaderboard_df.iloc[:, list(range(0, 18))] df_LFI = leaderboard_df.iloc[:, list(range(0, 2)) + list(range(18, 42))] df_CLA = leaderboard_df.iloc[:, list(range(0, 2)) + list(range(42, 56))] # Get df_overall df_overall = leaderboard_df.iloc[:, list(range(0, 56))] table_df_dict = { "Overall": df_overall, "Basic Information Retrieval": df_BIR, "Legal Foundation Inference": df_LFI, "Complex Legal Application": df_CLA, } return table_df_dict if __name__ == "__main__": df1 = plot_data() df2 = tab_data()