|
import streamlit as st
|
|
import pandas as pd
|
|
import os
|
|
import time
|
|
import json
|
|
import tqdm
|
|
import datasets
|
|
|
|
from huggingface_hub import HfApi
|
|
from huggingface_hub import hf_hub_download
|
|
|
|
from utils.upload_hub import upload_scores_to_hub, file_name_decode
|
|
from utils.Evaluation_answer_txt import Evaluation_answer_txt
|
|
from utils.hub_info import get_model_size
|
|
from utils.filterable_dataframe import filterable_dataframe
|
|
|
|
|
|
st.set_page_config(layout="centered")
|
|
st.markdown(
|
|
f"""
|
|
<style>
|
|
.appview-container .main .block-container{{
|
|
max-width: 80%;
|
|
padding: 50px;
|
|
}}
|
|
</style>
|
|
""",
|
|
unsafe_allow_html=True
|
|
)
|
|
|
|
@st.cache_data
|
|
def download_gold_answer(repo, filename, token, force_download=False):
|
|
ret = hf_hub_download(repo_id=repo, repo_type='dataset', filename=filename, token=token, force_download=force_download)
|
|
return ret
|
|
|
|
|
|
HUB_TOKEN = st.secrets['hf']
|
|
HUB_API = HfApi(token=HUB_TOKEN)
|
|
|
|
LEADERBOARD_DATASET_REPO = 'zhaorui-nb/leaderboard-score'
|
|
|
|
|
|
ANSWER_REPO = 'zhaorui-nb/leaderboard-answer'
|
|
GET_GOLD_ANSWER_PATH = {
|
|
'Setting1': download_gold_answer(ANSWER_REPO, 'dataset/Setting1_test_answer.txt', HUB_TOKEN),
|
|
'Setting2': download_gold_answer(ANSWER_REPO, 'dataset/Setting2_test_answer.txt', HUB_TOKEN),
|
|
'Setting3': download_gold_answer(ANSWER_REPO, 'dataset/Setting3_test_answer.txt', HUB_TOKEN)
|
|
}
|
|
|
|
|
|
|
|
def get_leaderboard_df():
|
|
with st.spinner('Loading leaderboard data...'):
|
|
if st.session_state.get('leaderboard_df') is None:
|
|
dataset = datasets.load_dataset(LEADERBOARD_DATASET_REPO)
|
|
df = pd.DataFrame(dataset['train'])
|
|
|
|
|
|
df['model name'] = df['model name'].str.replace('@', '/')
|
|
|
|
|
|
df['model size'] = df['model name'].apply(lambda x: get_model_size(x, token=HUB_TOKEN))
|
|
|
|
st.session_state['leaderboard_df'] = df
|
|
return df
|
|
else:
|
|
return st.session_state['leaderboard_df']
|
|
|
|
|
|
st.title('De-identification Model Leaderboard')
|
|
|
|
try:
|
|
|
|
|
|
|
|
with st.container():
|
|
|
|
df = get_leaderboard_df()
|
|
|
|
with st.sidebar:
|
|
st.header("Leaderboard filter control panel")
|
|
default_columns = [c for c in df.columns if c not in ['file name', 'submitter', 'MICRO precision', 'MICRO recall', 'MACRO precision', 'MACRO recall', 'detail result']]
|
|
selected_columns = st.multiselect('Select columns to display', df.columns, default=default_columns)
|
|
|
|
filtered_df = filterable_dataframe(df)
|
|
|
|
|
|
st.write("setting the filter at the sidebar")
|
|
|
|
leaderboard_df = st.dataframe(filtered_df[selected_columns], selection_mode='multi-row', on_select='rerun', key='leaderboard')
|
|
|
|
st.subheader("Detail Result")
|
|
det_ind = st.session_state.leaderboard['selection']['rows']
|
|
if len(det_ind) == 0:
|
|
st.write(f'Please check the boxes in the Model Leaderboard to view the detailed results.')
|
|
else:
|
|
col_detial = st.columns(len(det_ind))
|
|
for i, dind in enumerate(det_ind):
|
|
with col_detial[i]:
|
|
dis = f"{df.iloc[dind]['model name']}___{df.iloc[dind]['dataset']}___{df.iloc[dind]['method']}"
|
|
color = [st.success, st.info, st.warning, st.error]
|
|
color[i % 4](dis)
|
|
|
|
dic = json.loads(df.iloc[dind]['detail result'])
|
|
dt_df = pd.DataFrame(dic).T
|
|
st.dataframe(dt_df)
|
|
|
|
except Exception as e:
|
|
st.error(f"Error: {e}")
|
|
|
|
st.markdown("---")
|
|
|
|
|
|
|
|
|
|
|
|
model_name_input = ''
|
|
dataset_input = ''
|
|
method_input = ''
|
|
file_name = ''
|
|
submitter_input = ''
|
|
|
|
if 'score_json' not in st.session_state:
|
|
st.session_state['score_json'] = None
|
|
|
|
@st.cache_data()
|
|
def get_file_info(uploaded_file):
|
|
filename_info = file_name_decode(uploaded_file.name)
|
|
return filename_info
|
|
|
|
@st.cache_data()
|
|
def eval_answer_txt(set_name, uploaded_file):
|
|
print(f"eval_answer_txt: {time.time()}" , set_name)
|
|
|
|
if set_name not in GET_GOLD_ANSWER_PATH:
|
|
return None
|
|
gold_answer_txt = GET_GOLD_ANSWER_PATH[set_name]
|
|
eval = Evaluation_answer_txt(gold_answer_txt, uploaded_file)
|
|
score_json = eval.eval()
|
|
return score_json
|
|
|
|
def clear_score_json():
|
|
st.session_state['score_json'] = None
|
|
|
|
st.title("Model Evaluation")
|
|
st.write("Support file naming: [{Organization@Model}][{Dataaset}][{Method}]{Filename}.txt")
|
|
|
|
col_upload = st.columns([3,1])
|
|
with col_upload[0]:
|
|
uploaded_file = st.file_uploader("Please upload the answer.txt file", type=["txt"], key="uploaded_file", on_change=clear_score_json)
|
|
with col_upload[1]:
|
|
if not uploaded_file:
|
|
st.warning("please upload file")
|
|
st.session_state['score_json'] = None
|
|
else:
|
|
st.success("file uploaded successfully")
|
|
|
|
filename_info = get_file_info(uploaded_file)
|
|
if filename_info:
|
|
model_name_input = filename_info['model_name']
|
|
dataset_input = filename_info['dataset']
|
|
method_input = filename_info['method']
|
|
file_name = filename_info['file_name']
|
|
|
|
col_score = st.columns([7,5])
|
|
if uploaded_file:
|
|
with col_score[1], st.container(border=True):
|
|
model_name_input = st.text_input("model name", model_name_input)
|
|
dataset_input = st.text_input("dataset", dataset_input)
|
|
method_input = st.text_input("method", method_input)
|
|
file_name = st.text_input("file name", file_name)
|
|
submitter_input = st.text_input("submitter", submitter_input)
|
|
check_all_fill_in = model_name_input and dataset_input and method_input and file_name and submitter_input
|
|
|
|
col_sumit_and_recalculate = st.columns(2)
|
|
with col_sumit_and_recalculate[0]:
|
|
calculate_btn = st.button("calculate", type='secondary', use_container_width=True)
|
|
with col_sumit_and_recalculate[1]:
|
|
submit_btn = st.button("SUBMIT", type='primary', use_container_width=True , disabled=not check_all_fill_in)
|
|
|
|
if calculate_btn or st.session_state['score_json'] is None:
|
|
set_name = dataset_input
|
|
st.session_state['score_json'] = eval_answer_txt(set_name, uploaded_file)
|
|
if st.session_state['score_json']:
|
|
st.success("evaluation success")
|
|
else:
|
|
st.error("evaluation failed, please check the file content or set the correct dataset name.")
|
|
|
|
if st.session_state['score_json']:
|
|
with col_score[0], st.container(border=True):
|
|
df = pd.DataFrame(st.session_state['score_json']).T
|
|
|
|
tag_df = df.drop(["MICRO_AVERAGE", "MACRO_AVERAGE"], axis=0)
|
|
avg_df = df.loc[["MICRO_AVERAGE", "MACRO_AVERAGE"]]
|
|
|
|
col_sort_func = st.columns(2)
|
|
|
|
with col_sort_func[0]:
|
|
sorted_column = st.selectbox("ιζ©ζεΊε", df.columns)
|
|
|
|
with col_sort_func[1]:
|
|
ascending = st.radio("Sort Order", ["Ascending", "Descending"])
|
|
|
|
tag_df = tag_df.sort_values(by=sorted_column, ascending=ascending=="Ascending")
|
|
|
|
st.dataframe(pd.concat([tag_df, avg_df]), use_container_width=True)
|
|
|
|
|
|
if not check_all_fill_in:
|
|
st.warning("Please fill in the complete information.")
|
|
|
|
if submit_btn:
|
|
if st.session_state['score_json']:
|
|
score_json = st.session_state['score_json']
|
|
|
|
leaderboard_dict = {
|
|
"model name": model_name_input,
|
|
"dataset": dataset_input,
|
|
"method": method_input,
|
|
"file name": file_name,
|
|
"submitter": submitter_input,
|
|
|
|
"MICRO precision": score_json["MICRO_AVERAGE"]["precision"],
|
|
"MICRO recall": score_json["MICRO_AVERAGE"]["recall"],
|
|
"MICRO f1": score_json["MICRO_AVERAGE"]["f1"],
|
|
"MACRO precision": score_json["MACRO_AVERAGE"]["precision"],
|
|
"MACRO recall": score_json["MACRO_AVERAGE"]["recall"],
|
|
"MACRO f1": score_json["MACRO_AVERAGE"]["f1"],
|
|
"detail result": json.dumps(score_json,indent=4)
|
|
}
|
|
|
|
repo_file_path = f'data/train-[{model_name_input}][{dataset_input}][{method_input}][{file_name}].json'
|
|
upload_res = upload_scores_to_hub(HUB_API, leaderboard_dict, repo_file_path, hub_repo=LEADERBOARD_DATASET_REPO)
|
|
if upload_res:
|
|
st.success(f"submit success")
|
|
st.success(f"your score at here: {upload_res}")
|
|
else:
|
|
st.error("submit failed")
|
|
|
|
|
|
|