tianleliphoebe's picture
update
a4535f6
raw
history blame
5.92 kB
import pandas as pd
import gradio as gr
import csv
import json
import os
import shutil
from huggingface_hub import Repository
HF_TOKEN = os.environ.get("HF_TOKEN")
MODEL_INFO = [
"Model",
"Avg",
"GoEmotion",
"BANKING77",
"TecRED",
"Few-NERD",
"DialogRE",
"Discovery"
]
DATA_TITILE_TYPE = ['markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number']
SUBMISSION_NAME = "LongICL_leaderboard_submission"
SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/TIGER-Lab/", SUBMISSION_NAME)
CSV_DIR = "./LongICL_leaderboard_submission/results.csv"
COLUMN_NAMES = MODEL_INFO
LEADERBORAD_INTRODUCTION = """# Long In-context Learning Leaderboard
**"Which large language model is the BEST on long in-context learning task?"**<br>
πŸ† Welcome to the **LongICL** leaderboard! The leaderboard covers long in-context learning evaluation for popular long large language model.
<div style="display: flex; flex-wrap: wrap; align-items: center; gap: 10px;">
</div>
The evaluation set from the following datasets are being included in the leaderboard.
<table>
<tr>
<th><strong>Dataset</strong></th>
<th>Task Type</th>
<th>#Classes</th>
<th>#Tokens/Shot</th>
<th>#Total Tokens</th>
</tr>
<tr>
<td><strong>GoEmotion</strong></td>
<td>Emotion Classification</td>
<td>28</td>
<td>28</td>
<td>[1K, 4K]</td>
</tr>
<tr>
<td><strong>BANKING77</strong></td>
<td>Intent Classification</td>
<td>77</td>
<td>28</td>
<td>[2K, 11K]</td>
</tr>
<tr>
<td><strong>TecRED</strong></td>
<td>Relation Extraction</td>
<td>41</td>
<td>80</td>
<td>[4K, 18K]</td>
</tr>
<tr>
<td><strong>Few-NERD</strong></td>
<td>Entity Recognition</td>
<td>66</td>
<td>61</td>
<td>[5K, 23K]</td>
</tr>
<tr>
<td><strong>DialogRE</strong></td>
<td>Relation Extraction</td>
<td>36</td>
<td>226</td>
<td>[8K, 32K]</td>
</tr>
<tr>
<td><strong>Discovery</strong></td>
<td>Discourse Marker Classification</td>
<td>174</td>
<td>61</td>
<td>[10K, 50K]</td>
</tr>
</table>
**"How to evaluate your model and submit your results?"**<br>
Please refer to the guideline in <a href="https://github.com/TIGER-AI-Lab/LongICLBench/blob/main/README.md">Github</a> to evaluate your own model.
"""
TABLE_INTRODUCTION = """
"""
LEADERBORAD_INFO = """
We list the information of the used datasets as follows:<br>
GoEmotion<br>
<a href='https://aclanthology.org/2020.acl-main.372/'>Paper</a><br>
<a href='https://huggingface.co/datasets/go_emotions'>Data</a><br>
BANKING77<br>
<a href='https://arxiv.org/abs/2003.04807'>Paper</a><br>
<a href='https://huggingface.co/datasets/banking77'>Data</a><br>
TecRED<br>
<a href='https://aclanthology.org/D17-1004/'>Paper</a><br>
<a href='https://nlp.stanford.edu/projects/tacred/#usage'>Data</a><br>
Few-NERD<br>
<a href='https://aclanthology.org/2021.acl-long.248/'>Paper</a><br>
<a href='https://github.com/thunlp/Few-NERD?tab=readme-ov-file#get-the-data'>Data</a>
DialogRE<br>
<a href='https://aclanthology.org/2020.acl-main.444/'>Paper</a><br>
<a href='https://github.com/nlpdata/dialogre'>Data</a>
Discovery<br>
<a href='https://aclanthology.org/N19-1351/'>Paper</a><br>
<a href='https://huggingface.co/datasets/discovery'>Data</a>
"""
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r"""@article{Li2024LongcontextLS,
title={Long-context LLMs Struggle with Long In-context Learning},
author={Tianle Li and Ge Zhang and Quy Duc Do and Xiang Yue and Wenhu Chen},
journal={ArXiv},
year={2024},
volume={abs/2404.02060},
url={https://api.semanticscholar.org/CorpusID:268857023}
}
}"""
SUBMIT_INTRODUCTION = """# Submit on LongICL Leaderboard Introduction
## ⚠ Please note that you need to submit the json file with following format:
```json
{
"Model": "[NAME]",
"Repo": "https://huggingface.co/[MODEL_NAME]"
"GoEmotion": 50,
"BANKING77": 50,
"TecRED": 50,
"Few-NERD": 50,
"DialogRE": 50,
"Discovery": 50
}
```
After submitting, you can click the "Refresh" button to see the updated leaderboard(it may takes few seconds).
"""
def get_df():
repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN)
repo.git_pull()
df = pd.read_csv(CSV_DIR)
df['Avg'] = df[['GoEmotion', 'BANKING77', 'TecRED', 'Few-NERD', 'DialogRE', 'Discovery']].mean(axis=1).round(1)
df = df.sort_values(by=['Avg'], ascending=False)
return df[COLUMN_NAMES]
def add_new_eval(
input_file,
):
if input_file is None:
return "Error! Empty file!"
upload_data=json.loads(input_file)
data_row = [f'[{upload_data["Model"]}]({upload_data["Repo"]})', upload_data['GoEmotion'], upload_data['BANKING77'], upload_data['TecRED'], upload_data['Few-NERD'], upload_data['DialogRE'], upload_data['Discovery']]
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
submission_repo.git_pull()
already_submitted = []
with open(CSV_DIR, mode='r') as file:
reader = csv.reader(file, delimiter=',')
for row in reader:
already_submitted.append(row[0])
if data_row[0] not in already_submitted:
with open(CSV_DIR, mode='a', newline='') as file:
writer = csv.writer(file)
writer.writerow(data_row)
submission_repo.push_to_hub()
print('Submission Successful')
else:
print('The entry already exists')
def refresh_data():
return get_df()