Spaces:

TIGER-Lab
/

LongICL-Leaderboard

Sleeping

File size: 5,915 Bytes

a4535f6

import pandas as pd
import gradio as gr
import csv
import json
import os
import shutil
from huggingface_hub import Repository

HF_TOKEN = os.environ.get("HF_TOKEN")

MODEL_INFO = [
    "Model",
    "Avg",
    "GoEmotion",
    "BANKING77",
    "TecRED",
    "Few-NERD",
    "DialogRE",
    "Discovery"
    ]

DATA_TITILE_TYPE = ['markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number']

SUBMISSION_NAME = "LongICL_leaderboard_submission"
SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/TIGER-Lab/", SUBMISSION_NAME)
CSV_DIR = "./LongICL_leaderboard_submission/results.csv"

COLUMN_NAMES = MODEL_INFO

LEADERBORAD_INTRODUCTION = """# Long In-context Learning Leaderboard
    
    **"Which large language model is the BEST on long in-context learning task?"**<br>
    🏆 Welcome to the **LongICL** leaderboard! The leaderboard covers long in-context learning evaluation for popular long large language model.
    <div style="display: flex; flex-wrap: wrap; align-items: center; gap: 10px;">
    </div>
    The evaluation set from the following datasets are being included in the leaderboard.
    <table>
      <tr>
        <th><strong>Dataset</strong></th>
        <th>Task Type</th>
        <th>#Classes</th>
        <th>#Tokens/Shot</th>
        <th>#Total Tokens</th>
      </tr>
      <tr>
        <td><strong>GoEmotion</strong></td>
        <td>Emotion Classification</td>
        <td>28</td>
        <td>28</td>
        <td>[1K, 4K]</td>
      </tr>
      <tr>
        <td><strong>BANKING77</strong></td>
        <td>Intent Classification</td>
        <td>77</td>
        <td>28</td>
        <td>[2K, 11K]</td>
      </tr>
      <tr>
        <td><strong>TecRED</strong></td>
        <td>Relation Extraction</td>
        <td>41</td>
        <td>80</td>
        <td>[4K, 18K]</td>
      </tr>
      <tr>
        <td><strong>Few-NERD</strong></td>
        <td>Entity Recognition</td>
        <td>66</td>
        <td>61</td>
        <td>[5K, 23K]</td>
      </tr>
      <tr>
        <td><strong>DialogRE</strong></td>
        <td>Relation Extraction</td>
        <td>36</td>
        <td>226</td>
        <td>[8K, 32K]</td>
      </tr>
      <tr>
        <td><strong>Discovery</strong></td>
        <td>Discourse Marker Classification</td>
        <td>174</td>
        <td>61</td>
        <td>[10K, 50K]</td>
      </tr>
    </table>

    **"How to evaluate your model and submit your results?"**<br>
    Please refer to the guideline in <a href="https://github.com/TIGER-AI-Lab/LongICLBench/blob/main/README.md">Github</a> to evaluate your own model.

    """

TABLE_INTRODUCTION = """
    """

LEADERBORAD_INFO = """
We list the information of the used datasets as follows:<br>

GoEmotion<br>
<a href='https://aclanthology.org/2020.acl-main.372/'>Paper</a><br>
<a href='https://huggingface.co/datasets/go_emotions'>Data</a><br>

BANKING77<br>
<a href='https://arxiv.org/abs/2003.04807'>Paper</a><br>
<a href='https://huggingface.co/datasets/banking77'>Data</a><br>

TecRED<br>
<a href='https://aclanthology.org/D17-1004/'>Paper</a><br>
<a href='https://nlp.stanford.edu/projects/tacred/#usage'>Data</a><br>

Few-NERD<br>
<a href='https://aclanthology.org/2021.acl-long.248/'>Paper</a><br>
<a href='https://github.com/thunlp/Few-NERD?tab=readme-ov-file#get-the-data'>Data</a>

DialogRE<br>
<a href='https://aclanthology.org/2020.acl-main.444/'>Paper</a><br>
<a href='https://github.com/nlpdata/dialogre'>Data</a>

Discovery<br>
<a href='https://aclanthology.org/N19-1351/'>Paper</a><br>
<a href='https://huggingface.co/datasets/discovery'>Data</a>
"""

CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r"""@article{Li2024LongcontextLS,
  title={Long-context LLMs Struggle with Long In-context Learning},
  author={Tianle Li and Ge Zhang and Quy Duc Do and Xiang Yue and Wenhu Chen},
  journal={ArXiv},
  year={2024},
  volume={abs/2404.02060},
  url={https://api.semanticscholar.org/CorpusID:268857023}
}
}"""

SUBMIT_INTRODUCTION = """# Submit on LongICL Leaderboard Introduction

## ⚠ Please note that you need to submit the json file with following format:

```json
{
    "Model": "[NAME]",
    "Repo": "https://huggingface.co/[MODEL_NAME]"
    "GoEmotion": 50,
    "BANKING77": 50,
    "TecRED": 50,
    "Few-NERD": 50,
    "DialogRE": 50,
    "Discovery": 50
}
```
After submitting, you can click the "Refresh" button to see the updated leaderboard(it may takes few seconds).

"""
def get_df():
    repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN)
    repo.git_pull()
    df = pd.read_csv(CSV_DIR)
    df['Avg'] = df[['GoEmotion', 'BANKING77', 'TecRED', 'Few-NERD', 'DialogRE', 'Discovery']].mean(axis=1).round(1)
    df = df.sort_values(by=['Avg'], ascending=False)
    return df[COLUMN_NAMES]

def add_new_eval(
    input_file,
):
    if input_file is None:
        return "Error! Empty file!"

    upload_data=json.loads(input_file)
    data_row = [f'[{upload_data["Model"]}]({upload_data["Repo"]})', upload_data['GoEmotion'], upload_data['BANKING77'], upload_data['TecRED'], upload_data['Few-NERD'], upload_data['DialogRE'], upload_data['Discovery']]

    submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
    submission_repo.git_pull()

    already_submitted = []
    with open(CSV_DIR, mode='r') as file:
        reader = csv.reader(file, delimiter=',')
        for row in reader:
            already_submitted.append(row[0])

    if data_row[0] not in already_submitted:
        with open(CSV_DIR, mode='a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(data_row)
        
        submission_repo.push_to_hub()
        print('Submission Successful')
    else:
        print('The entry already exists')


def refresh_data():
    return get_df()