Spaces:

TIGER-Lab
/

LongICL-Leaderboard

Running

App Files Files Community

LongICL-Leaderboard / utils.py

wenhu

Update utils.py

29e7c67 verified 2 months ago

raw

history blame contribute delete

No virus

5.99 kB

	import pandas as pd
	import gradio as gr
	import csv
	import json
	import os
	import shutil
	from huggingface_hub import Repository

	HF_TOKEN = os.environ.get("HF_TOKEN")

	MODEL_INFO = [
	"Model",
	"Avg",
	"GoEmotion",
	"BANKING77",
	"TecRED",
	"Few-NERD",
	"DialogRE",
	"Discovery"
	]

	DATA_TITILE_TYPE = ['markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number']

	SUBMISSION_NAME = "LongICL_leaderboard_submission"
	SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/TIGER-Lab/", SUBMISSION_NAME)
	CSV_DIR = "./LongICL_leaderboard_submission/results.csv"

	COLUMN_NAMES = MODEL_INFO

	LEADERBORAD_INTRODUCTION = """# Long In-context Learning Leaderboard

	"Which large language model is the BEST on long in-context learning task?"<br>
	🏆 Welcome to the LongICL leaderboard! The leaderboard covers long in-context learning evaluation for popular long large language model.
	<div style="display: flex; flex-wrap: wrap; align-items: center; gap: 10px;">
	</div>
	The evaluation set from the following datasets are being included in the leaderboard.
	<table>
	<tr>
	<th><strong>Dataset</strong></th>
	<th>Task Type</th>
	<th>#Classes</th>
	<th>#Tokens/Shot</th>
	<th>#Total Tokens</th>
	</tr>
	<tr>
	<td><strong>GoEmotion</strong></td>
	<td>Emotion Classification</td>
	<td>28</td>
	<td>28</td>
	<td>[1K, 4K]</td>
	</tr>
	<tr>
	<td><strong>BANKING77</strong></td>
	<td>Intent Classification</td>
	<td>77</td>
	<td>28</td>
	<td>[2K, 11K]</td>
	</tr>
	<tr>
	<td><strong>TecRED</strong></td>
	<td>Relation Extraction</td>
	<td>41</td>
	<td>80</td>
	<td>[4K, 18K]</td>
	</tr>
	<tr>
	<td><strong>Few-NERD</strong></td>
	<td>Entity Recognition</td>
	<td>66</td>
	<td>61</td>
	<td>[5K, 23K]</td>
	</tr>
	<tr>
	<td><strong>DialogRE</strong></td>
	<td>Relation Extraction</td>
	<td>36</td>
	<td>226</td>
	<td>[8K, 32K]</td>
	</tr>
	<tr>
	<td><strong>Discovery</strong></td>
	<td>Discourse Marker Classification</td>
	<td>174</td>
	<td>61</td>
	<td>[10K, 50K]</td>
	</tr>
	</table>

	"How to evaluate your model and submit your results?"<br>
	Please refer to the guideline in <a href="https://github.com/TIGER-AI-Lab/LongICLBench/blob/main/README.md">Github</a> to evaluate your own model.

	"""

	TABLE_INTRODUCTION = """
	"""

	LEADERBORAD_INFO = """
	We list the information of the used datasets as follows:<br>

	GoEmotion<br>
	<a href='https://aclanthology.org/2020.acl-main.372/'>Paper</a><br>
	<a href='https://huggingface.co/datasets/go_emotions'>Data</a><br>

	BANKING77<br>
	<a href='https://arxiv.org/abs/2003.04807'>Paper</a><br>
	<a href='https://huggingface.co/datasets/banking77'>Data</a><br>

	TecRED<br>
	<a href='https://aclanthology.org/D17-1004/'>Paper</a><br>
	<a href='https://nlp.stanford.edu/projects/tacred/#usage'>Data</a><br>

	Few-NERD<br>
	<a href='https://aclanthology.org/2021.acl-long.248/'>Paper</a><br>
	<a href='https://github.com/thunlp/Few-NERD?tab=readme-ov-file#get-the-data'>Data</a>

	DialogRE<br>
	<a href='https://aclanthology.org/2020.acl-main.444/'>Paper</a><br>
	<a href='https://github.com/nlpdata/dialogre'>Data</a>

	Discovery<br>
	<a href='https://aclanthology.org/N19-1351/'>Paper</a><br>
	<a href='https://huggingface.co/datasets/discovery'>Data</a>
	"""

	CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
	CITATION_BUTTON_TEXT = r"""@article{Li2024LongcontextLS,
	title={Long-context LLMs Struggle with Long In-context Learning},
	author={Tianle Li and Ge Zhang and Quy Duc Do and Xiang Yue and Wenhu Chen},
	journal={ArXiv},
	year={2024},
	volume={abs/2404.02060},
	url={https://api.semanticscholar.org/CorpusID:268857023}
	}"""

	SUBMIT_INTRODUCTION = """# Submit on LongICL Leaderboard Introduction

	## ⚠ Please note that you need to submit the json file with following format (Only include the highest score among 1/2/3/4/5 rounds for each dataset):

	```json
	{
	"Model": "[NAME]",
	"Repo": "https://huggingface.co/[MODEL_NAME]"
	"GoEmotion": 50,
	"BANKING77": 50,
	"TecRED": 50,
	"Few-NERD": 50,
	"DialogRE": 50,
	"Discovery": 50
	}
	```
	After submitting, you can click the "Refresh" button to see the updated leaderboard(it may takes few seconds).

	"""
	def get_df():
	repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN)
	repo.git_pull()
	df = pd.read_csv(CSV_DIR)
	df['Avg'] = df[['GoEmotion', 'BANKING77', 'TecRED', 'Few-NERD', 'DialogRE', 'Discovery']].mean(axis=1).round(1)
	df = df.sort_values(by=['Avg'], ascending=False)
	return df[COLUMN_NAMES]

	def add_new_eval(
	input_file,
	):
	if input_file is None:
	return "Error! Empty file!"

	upload_data=json.loads(input_file)
	data_row = [f'[{upload_data["Model"]}]({upload_data["Repo"]})', upload_data['GoEmotion'], upload_data['BANKING77'], upload_data['TecRED'], upload_data['Few-NERD'], upload_data['DialogRE'], upload_data['Discovery']]

	submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
	submission_repo.git_pull()

	already_submitted = []
	with open(CSV_DIR, mode='r') as file:
	reader = csv.reader(file, delimiter=',')
	for row in reader:
	already_submitted.append(row[0])

	if data_row[0] not in already_submitted:
	with open(CSV_DIR, mode='a', newline='') as file:
	writer = csv.writer(file)
	writer.writerow(data_row)

	submission_repo.push_to_hub()
	print('Submission Successful')
	else:
	print('The entry already exists')


	def refresh_data():
	return get_df()