import pandas as pd import gradio as gr import csv import json import os import shutil from huggingface_hub import Repository HF_TOKEN = os.environ.get("HF_TOKEN") MODEL_INFO = [ "Model", "Avg", "GoEmotion", "BANKING77", "TecRED", "Few-NERD", "DialogRE", "Discovery" ] DATA_TITILE_TYPE = ['markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number'] SUBMISSION_NAME = "LongICL_leaderboard_submission" SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/TIGER-Lab/", SUBMISSION_NAME) CSV_DIR = "./LongICL_leaderboard_submission/results.csv" COLUMN_NAMES = MODEL_INFO LEADERBORAD_INTRODUCTION = """# Long In-context Learning Leaderboard **"Which large language model is the BEST on long in-context learning task?"**
🏆 Welcome to the **LongICL** leaderboard! The leaderboard covers long in-context learning evaluation for popular long large language model.

The evaluation set from the following datasets are being included in the leaderboard.

Dataset	Task Type	#Classes	#Tokens/Shot	#Total Tokens
GoEmotion	Emotion Classification	28	28	[1K, 4K]
BANKING77	Intent Classification	77	28	[2K, 11K]
TecRED	Relation Extraction	41	80	[4K, 18K]
Few-NERD	Entity Recognition	66	61	[5K, 23K]
DialogRE	Relation Extraction	36	226	[8K, 32K]
Discovery	Discourse Marker Classification	174	61	[10K, 50K]

**"How to evaluate your model and submit your results?"**
Please refer to the guideline in Github to evaluate your own model. """ TABLE_INTRODUCTION = """ """ LEADERBORAD_INFO = """ We list the information of the used datasets as follows:
GoEmotion
Paper
Data
BANKING77
Paper
Data
TecRED
Paper
Data
Few-NERD
Paper
Data DialogRE
Paper
Data Discovery
Paper
Data """ CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" CITATION_BUTTON_TEXT = r"""@article{Li2024LongcontextLS, title={Long-context LLMs Struggle with Long In-context Learning}, author={Tianle Li and Ge Zhang and Quy Duc Do and Xiang Yue and Wenhu Chen}, journal={ArXiv}, year={2024}, volume={abs/2404.02060}, url={https://api.semanticscholar.org/CorpusID:268857023} }""" SUBMIT_INTRODUCTION = """# Submit on LongICL Leaderboard Introduction ## ⚠ Please note that you need to submit the json file with following format (Only include the highest score among 1/2/3/4/5 rounds for each dataset): ```json { "Model": "[NAME]", "Repo": "https://huggingface.co/[MODEL_NAME]" "GoEmotion": 50, "BANKING77": 50, "TecRED": 50, "Few-NERD": 50, "DialogRE": 50, "Discovery": 50 } ``` After submitting, you can click the "Refresh" button to see the updated leaderboard(it may takes few seconds). """ def get_df(): repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN) repo.git_pull() df = pd.read_csv(CSV_DIR) df['Avg'] = df[['GoEmotion', 'BANKING77', 'TecRED', 'Few-NERD', 'DialogRE', 'Discovery']].mean(axis=1).round(1) df = df.sort_values(by=['Avg'], ascending=False) return df[COLUMN_NAMES] def add_new_eval( input_file, ): if input_file is None: return "Error! Empty file!" upload_data=json.loads(input_file) data_row = [f'[{upload_data["Model"]}]({upload_data["Repo"]})', upload_data['GoEmotion'], upload_data['BANKING77'], upload_data['TecRED'], upload_data['Few-NERD'], upload_data['DialogRE'], upload_data['Discovery']] submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset") submission_repo.git_pull() already_submitted = [] with open(CSV_DIR, mode='r') as file: reader = csv.reader(file, delimiter=',') for row in reader: already_submitted.append(row[0]) if data_row[0] not in already_submitted: with open(CSV_DIR, mode='a', newline='') as file: writer = csv.writer(file) writer.writerow(data_row) submission_repo.push_to_hub() print('Submission Successful') else: print('The entry already exists') def refresh_data(): return get_df()