Spaces:
Sleeping
Sleeping
import pandas as pd | |
import gradio as gr | |
import csv | |
import json | |
import os | |
import shutil | |
from huggingface_hub import Repository | |
HF_TOKEN = os.environ.get("HF_TOKEN") | |
MODEL_INFO = [ | |
"Model", | |
"Avg", | |
"GoEmotion", | |
"BANKING77", | |
"TecRED", | |
"Few-NERD", | |
"DialogRE", | |
"Discovery" | |
] | |
DATA_TITILE_TYPE = ['markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number'] | |
SUBMISSION_NAME = "LongICL_leaderboard_submission" | |
SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/TIGER-Lab/", SUBMISSION_NAME) | |
CSV_DIR = "./LongICL_leaderboard_submission/results.csv" | |
COLUMN_NAMES = MODEL_INFO | |
LEADERBORAD_INTRODUCTION = """# Long In-context Learning Leaderboard | |
**"Which large language model is the BEST on long in-context learning task?"**<br> | |
π Welcome to the **LongICL** leaderboard! The leaderboard covers long in-context learning evaluation for popular long large language model. | |
<div style="display: flex; flex-wrap: wrap; align-items: center; gap: 10px;"> | |
</div> | |
The evaluation set from the following datasets are being included in the leaderboard. | |
<table> | |
<tr> | |
<th><strong>Dataset</strong></th> | |
<th>Task Type</th> | |
<th>#Classes</th> | |
<th>#Tokens/Shot</th> | |
<th>#Total Tokens</th> | |
</tr> | |
<tr> | |
<td><strong>GoEmotion</strong></td> | |
<td>Emotion Classification</td> | |
<td>28</td> | |
<td>28</td> | |
<td>[1K, 4K]</td> | |
</tr> | |
<tr> | |
<td><strong>BANKING77</strong></td> | |
<td>Intent Classification</td> | |
<td>77</td> | |
<td>28</td> | |
<td>[2K, 11K]</td> | |
</tr> | |
<tr> | |
<td><strong>TecRED</strong></td> | |
<td>Relation Extraction</td> | |
<td>41</td> | |
<td>80</td> | |
<td>[4K, 18K]</td> | |
</tr> | |
<tr> | |
<td><strong>Few-NERD</strong></td> | |
<td>Entity Recognition</td> | |
<td>66</td> | |
<td>61</td> | |
<td>[5K, 23K]</td> | |
</tr> | |
<tr> | |
<td><strong>DialogRE</strong></td> | |
<td>Relation Extraction</td> | |
<td>36</td> | |
<td>226</td> | |
<td>[8K, 32K]</td> | |
</tr> | |
<tr> | |
<td><strong>Discovery</strong></td> | |
<td>Discourse Marker Classification</td> | |
<td>174</td> | |
<td>61</td> | |
<td>[10K, 50K]</td> | |
</tr> | |
</table> | |
**"How to evaluate your model and submit your results?"**<br> | |
Please refer to the guideline in <a href="https://github.com/TIGER-AI-Lab/LongICLBench/blob/main/README.md">Github</a> to evaluate your own model. | |
""" | |
TABLE_INTRODUCTION = """ | |
""" | |
LEADERBORAD_INFO = """ | |
We list the information of the used datasets as follows:<br> | |
GoEmotion<br> | |
<a href='https://aclanthology.org/2020.acl-main.372/'>Paper</a><br> | |
<a href='https://huggingface.co/datasets/go_emotions'>Data</a><br> | |
BANKING77<br> | |
<a href='https://arxiv.org/abs/2003.04807'>Paper</a><br> | |
<a href='https://huggingface.co/datasets/banking77'>Data</a><br> | |
TecRED<br> | |
<a href='https://aclanthology.org/D17-1004/'>Paper</a><br> | |
<a href='https://nlp.stanford.edu/projects/tacred/#usage'>Data</a><br> | |
Few-NERD<br> | |
<a href='https://aclanthology.org/2021.acl-long.248/'>Paper</a><br> | |
<a href='https://github.com/thunlp/Few-NERD?tab=readme-ov-file#get-the-data'>Data</a> | |
DialogRE<br> | |
<a href='https://aclanthology.org/2020.acl-main.444/'>Paper</a><br> | |
<a href='https://github.com/nlpdata/dialogre'>Data</a> | |
Discovery<br> | |
<a href='https://aclanthology.org/N19-1351/'>Paper</a><br> | |
<a href='https://huggingface.co/datasets/discovery'>Data</a> | |
""" | |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" | |
CITATION_BUTTON_TEXT = r"""@article{Li2024LongcontextLS, | |
title={Long-context LLMs Struggle with Long In-context Learning}, | |
author={Tianle Li and Ge Zhang and Quy Duc Do and Xiang Yue and Wenhu Chen}, | |
journal={ArXiv}, | |
year={2024}, | |
volume={abs/2404.02060}, | |
url={https://api.semanticscholar.org/CorpusID:268857023} | |
} | |
}""" | |
SUBMIT_INTRODUCTION = """# Submit on LongICL Leaderboard Introduction | |
## β Please note that you need to submit the json file with following format: | |
```json | |
{ | |
"Model": "[NAME]", | |
"Repo": "https://huggingface.co/[MODEL_NAME]" | |
"GoEmotion": 50, | |
"BANKING77": 50, | |
"TecRED": 50, | |
"Few-NERD": 50, | |
"DialogRE": 50, | |
"Discovery": 50 | |
} | |
``` | |
After submitting, you can click the "Refresh" button to see the updated leaderboard(it may takes few seconds). | |
""" | |
def get_df(): | |
repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN) | |
repo.git_pull() | |
df = pd.read_csv(CSV_DIR) | |
df['Avg'] = df[['GoEmotion', 'BANKING77', 'TecRED', 'Few-NERD', 'DialogRE', 'Discovery']].mean(axis=1).round(1) | |
df = df.sort_values(by=['Avg'], ascending=False) | |
return df[COLUMN_NAMES] | |
def add_new_eval( | |
input_file, | |
): | |
if input_file is None: | |
return "Error! Empty file!" | |
upload_data=json.loads(input_file) | |
data_row = [f'[{upload_data["Model"]}]({upload_data["Repo"]})', upload_data['GoEmotion'], upload_data['BANKING77'], upload_data['TecRED'], upload_data['Few-NERD'], upload_data['DialogRE'], upload_data['Discovery']] | |
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset") | |
submission_repo.git_pull() | |
already_submitted = [] | |
with open(CSV_DIR, mode='r') as file: | |
reader = csv.reader(file, delimiter=',') | |
for row in reader: | |
already_submitted.append(row[0]) | |
if data_row[0] not in already_submitted: | |
with open(CSV_DIR, mode='a', newline='') as file: | |
writer = csv.writer(file) | |
writer.writerow(data_row) | |
submission_repo.push_to_hub() | |
print('Submission Successful') | |
else: | |
print('The entry already exists') | |
def refresh_data(): | |
return get_df() |