import streamlit as st import pandas as pd import os import time import json import tqdm import datasets from huggingface_hub import HfApi from huggingface_hub import hf_hub_download from utils.upload_hub import upload_scores_to_hub, file_name_decode from utils.Evaluation_answer_txt import Evaluation_answer_txt from utils.hub_info import get_model_size from utils.filterable_dataframe import filterable_dataframe # st.set_page_config(layout="wide") st.set_page_config(layout="centered") st.markdown( f""" """, unsafe_allow_html=True ) @st.cache_data def download_gold_answer(repo, filename, token, force_download=False): ret = hf_hub_download(repo_id=repo, repo_type='dataset', filename=filename, token=token, force_download=force_download) return ret HUB_TOKEN = st.secrets['hf'] HUB_API = HfApi(token=HUB_TOKEN) LEADERBOARD_DATASET_REPO = 'zhaorui-nb/leaderboard-score' # Setting1 Setting2 Setting3 ANSWER_REPO = 'zhaorui-nb/leaderboard-answer' GET_GOLD_ANSWER_PATH = { 'Setting1': download_gold_answer(ANSWER_REPO, 'dataset/Setting1_test_answer.txt', HUB_TOKEN), 'Setting2': download_gold_answer(ANSWER_REPO, 'dataset/Setting2_test_answer.txt', HUB_TOKEN), 'Setting3': download_gold_answer(ANSWER_REPO, 'dataset/Setting3_test_answer.txt', HUB_TOKEN) } # cache the dataset in the session state def get_leaderboard_df(): with st.spinner('Loading leaderboard data...'): if st.session_state.get('leaderboard_df') is None: dataset = datasets.load_dataset(LEADERBOARD_DATASET_REPO) df = pd.DataFrame(dataset['train']) # replace model name column @ to / df['model name'] = df['model name'].str.replace('@', '/') #### add model size df['model size'] = df['model name'].apply(lambda x: get_model_size(x, token=HUB_TOKEN)) st.session_state['leaderboard_df'] = df return df else: return st.session_state['leaderboard_df'] st.title('De-identification Model Leaderboard') try: with st.container(): # columns ['model name', 'dataset', 'method', 'file name', 'submitter', 'MICRO precision', 'MICRO recall', 'MICRO f1', 'MACRO precision', 'MACRO recall', 'MACRO f1', 'detail result'] df = get_leaderboard_df() with st.sidebar: # st.expander("Leaderboard", expanded=True): st.header("Leaderboard filter control panel") default_columns = [c for c in df.columns if c not in ['file name', 'submitter', 'MICRO precision', 'MICRO recall', 'MACRO precision', 'MACRO recall', 'detail result']] selected_columns = st.multiselect('Select columns to display', df.columns, default=default_columns) # add filterable dataframe filtered_df = filterable_dataframe(df) # hit the user can filter the leaderboard at the sidebar st.write("setting the filter at the sidebar") leaderboard_df = st.dataframe(filtered_df[selected_columns], selection_mode='multi-row', on_select='rerun', key='leaderboard') st.subheader("Detail Result") det_ind = st.session_state.leaderboard['selection']['rows'] if len(det_ind) == 0: st.write(f'Please check the boxes in the Model Leaderboard to view the detailed results.') else: col_detial = st.columns(len(det_ind)) for i, dind in enumerate(det_ind): with col_detial[i]: dis = f"{df.iloc[dind]['model name']}___{df.iloc[dind]['dataset']}___{df.iloc[dind]['method']}" color = [st.success, st.info, st.warning, st.error] color[i % 4](dis) dic = json.loads(df.iloc[dind]['detail result']) dt_df = pd.DataFrame(dic).T st.dataframe(dt_df) except Exception as e: st.error(f"Error: {e}") st.markdown("---") # ############################################################################################################ # ############################################### Evaluation_answer_txt # ############################################################################################################ model_name_input = '' dataset_input = '' method_input = '' file_name = '' submitter_input = '' if 'score_json' not in st.session_state: st.session_state['score_json'] = None @st.cache_data() def get_file_info(uploaded_file): filename_info = file_name_decode(uploaded_file.name) return filename_info @st.cache_data() def eval_answer_txt(set_name, uploaded_file): print(f"eval_answer_txt: {time.time()}" , set_name) if set_name not in GET_GOLD_ANSWER_PATH: return None gold_answer_txt = GET_GOLD_ANSWER_PATH[set_name] eval = Evaluation_answer_txt(gold_answer_txt, uploaded_file) score_json = eval.eval() return score_json def clear_score_json(): st.session_state['score_json'] = None st.title("Model Evaluation") st.write("Support file naming: [{Organization@Model}][{Dataaset}][{Method}]{Filename}.txt") col_upload = st.columns([3,1]) with col_upload[0]: uploaded_file = st.file_uploader("Please upload the answer.txt file", type=["txt"], key="uploaded_file", on_change=clear_score_json) with col_upload[1]: if not uploaded_file: st.warning("please upload file") st.session_state['score_json'] = None else: st.success("file uploaded successfully") filename_info = get_file_info(uploaded_file) if filename_info: model_name_input = filename_info['model_name'] dataset_input = filename_info['dataset'] method_input = filename_info['method'] file_name = filename_info['file_name'] col_score = st.columns([7,5]) if uploaded_file: with col_score[1], st.container(border=True): model_name_input = st.text_input("model name", model_name_input) dataset_input = st.text_input("dataset", dataset_input) method_input = st.text_input("method", method_input) file_name = st.text_input("file name", file_name) submitter_input = st.text_input("submitter", submitter_input) check_all_fill_in = model_name_input and dataset_input and method_input and file_name and submitter_input col_sumit_and_recalculate = st.columns(2) with col_sumit_and_recalculate[0]: calculate_btn = st.button("calculate", type='secondary', use_container_width=True) with col_sumit_and_recalculate[1]: submit_btn = st.button("SUBMIT", type='primary', use_container_width=True , disabled=not check_all_fill_in) if calculate_btn or st.session_state['score_json'] is None: set_name = dataset_input st.session_state['score_json'] = eval_answer_txt(set_name, uploaded_file) if st.session_state['score_json']: st.success("evaluation success") else: st.error("evaluation failed, please check the file content or set the correct dataset name.") if st.session_state['score_json']: with col_score[0], st.container(border=True): df = pd.DataFrame(st.session_state['score_json']).T # split the column MICRO_AVERAGE and MACRO_AVERAGE into another dataframe tag_df = df.drop(["MICRO_AVERAGE", "MACRO_AVERAGE"], axis=0) avg_df = df.loc[["MICRO_AVERAGE", "MACRO_AVERAGE"]] col_sort_func = st.columns(2) with col_sort_func[0]: sorted_column = st.selectbox("选择排序列", df.columns) with col_sort_func[1]: ascending = st.radio("Sort Order", ["Ascending", "Descending"]) tag_df = tag_df.sort_values(by=sorted_column, ascending=ascending=="Ascending") st.dataframe(pd.concat([tag_df, avg_df]), use_container_width=True) if not check_all_fill_in: st.warning("Please fill in the complete information.") if submit_btn: if st.session_state['score_json']: score_json = st.session_state['score_json'] leaderboard_dict = { "model name": model_name_input, "dataset": dataset_input, "method": method_input, "file name": file_name, "submitter": submitter_input, "MICRO precision": score_json["MICRO_AVERAGE"]["precision"], "MICRO recall": score_json["MICRO_AVERAGE"]["recall"], "MICRO f1": score_json["MICRO_AVERAGE"]["f1"], "MACRO precision": score_json["MACRO_AVERAGE"]["precision"], "MACRO recall": score_json["MACRO_AVERAGE"]["recall"], "MACRO f1": score_json["MACRO_AVERAGE"]["f1"], "detail result": json.dumps(score_json,indent=4) #score_json } repo_file_path = f'data/train-[{model_name_input}][{dataset_input}][{method_input}][{file_name}].json' upload_res = upload_scores_to_hub(HUB_API, leaderboard_dict, repo_file_path, hub_repo=LEADERBOARD_DATASET_REPO) if upload_res: st.success(f"submit success") st.success(f"your score at here: {upload_res}") else: st.error("submit failed")