Spaces:
Sleeping
Sleeping
import copy | |
import numpy as np | |
import pandas as pd | |
def process_plot_data(df, flag=False): | |
# 保留"Model"和"Domain"列,删除其他列 | |
df2 = df[["Model", "Domain"]].copy() | |
columns_names = ["Model", "Domain", "AR", "ER", "NER", "JS", "CR", "CFM", "SCM", | |
"CP", "PTP", "CTP", "LQA", "JRG", "CU", "LC", "JRG-TAG", "LC-TAG"] | |
# 计算新的列的值 | |
for col in columns_names[2:]: | |
if col in ["AR", "ER", "CR", "CFM", "SCM", "CP", "PTP", "CTP", "LQA"]: | |
df2[col] = df[f"{col}-F1"] * 100 | |
if col == "NER": | |
df2[col] = df[f"{col}-Acc"] * 100 | |
if col in ["JRG", "LC", "JS", "CU", "JRG-TAG", "LC-TAG"]: | |
rouge_mean = df[[f"{col}-ROUGE-1", f"{col}-ROUGE-2", f"{col}-ROUGE-L"]].replace('-', np.nan).mean(axis=1) | |
df2.loc[df[f"{col}-ROUGE-1"] == '-', col] = '-' | |
df2.loc[df[f"{col}-ROUGE-1"] != '-', col] = rouge_mean * 100 | |
# if col in ["JS", "CU"]: | |
# df2[col] = df[[f"{col}-ROUGE-1", f"{col}-ROUGE-2", f"{col}-ROUGE-L"]].mean(axis=1) * 100 | |
df2.reindex(columns=columns_names) | |
flag = True | |
if flag: | |
# 保存到Excel文件 | |
with pd.ExcelWriter('scores.xlsx') as writer: | |
df2.to_excel(writer, sheet_name="Sheet1", index=False) | |
return df2 | |
def plot_data(): | |
# read df and replace NaN values with an empty string | |
leaderboard_df = pd.read_excel( | |
'leaderboard.xlsx', | |
sheet_name='Sheet2', | |
header=0, | |
usecols='A:BE', | |
nrows=18) | |
leaderboard_df.fillna("-") | |
df = process_plot_data(leaderboard_df) | |
# df.drop(df[df['Model'] == 'Baichuan-7B'].index, inplace=True) | |
df_BIR = df.iloc[:, [0] + list(range(2, 7))] | |
df_LFI = df.iloc[:, [0] + list(range(7, 13))] | |
df_CLA = df.iloc[:, [0] + list(range(13, 16))] | |
# Get df_overall | |
df_overall = df.iloc[:, [0] + list(range(2, 15))] | |
plot_df_dict = { | |
"Overall": df_overall, | |
"Basic Information Retrieval": df_BIR, | |
"Legal Foundation Inference": df_LFI, | |
"Complex Legal Application": df_CLA, | |
} | |
return plot_df_dict | |
def tab_data(): | |
# read df and replace NaN values with an empty string | |
leaderboard_df = pd.read_excel( | |
'leaderboard.xlsx', | |
sheet_name='Sheet2', | |
header=0, | |
usecols='A:BE', | |
nrows=18) | |
leaderboard_df.fillna("-") | |
# leaderboard_df.drop(leaderboard_df[leaderboard_df['Model'] == 'Baichuan-7B'].index, inplace=True) | |
df_BIR = leaderboard_df.iloc[:, list(range(0, 18))] | |
df_LFI = leaderboard_df.iloc[:, list(range(0, 2)) + list(range(18, 42))] | |
df_CLA = leaderboard_df.iloc[:, list(range(0, 2)) + list(range(42, 56))] | |
# Get df_overall | |
df_overall = leaderboard_df.iloc[:, list(range(0, 56))] | |
table_df_dict = { | |
"Overall": df_overall, | |
"Basic Information Retrieval": df_BIR, | |
"Legal Foundation Inference": df_LFI, | |
"Complex Legal Application": df_CLA, | |
} | |
return table_df_dict | |
if __name__ == "__main__": | |
df1 = plot_data() | |
df2 = tab_data() | |