File size: 1,495 Bytes
f067bfb 90fafdc f067bfb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
TITLE = '<h1 align="center" id="space-title">Open Multilingual LLM Evaluation Leaderboard</h1>'
INTRO_TEXT = f"""
## About
This leaderboard shows the performance of pretrained models in 29 languages on four benchmarks:
- <a href="https://arxiv.org/abs/1803.05457" target="_blank"> AI2 Reasoning Challenge </a> (25-shot)
- <a href="https://arxiv.org/abs/1905.07830" target="_blank"> HellaSwag </a> (10-shot)
- <a href="https://arxiv.org/abs/2009.03300" target="_blank"> MMLU </a> (5-shot)
- <a href="https://arxiv.org/abs/2109.07958" target="_blank"> TruthfulQA </a> (0-shot)
The evaluation data was translated into 29 languages using ChatGPT.
"""
HOW_TO = f"""
## How to list your model performance on this leaderboard:
Send an email with title [Open mLLM Loaderboard] to vietl@uoregon.edu with the huggingface's model name.
We will run your model on the four benchmarks and add it to the leaderboard.
"""
CREDIT = f"""
## Credit
To make this website, we use the following resources:
- Datasets (AI2_ARC, HellaSwag, MMLU, TruthfulQA)
- Funding and GPU access (Adobe Research)
- Evaluation code (EutherAI's lm_evaluation_harness repo)
- Leaderboard code (Huggingface4's open_llm_leaderboard repo)
"""
CITATION = f"""
## Citation
```
@misc{{lai2023openllmbenchmark,
author = {{Viet Lai and Nghia Trung Ngo and Amir Pouran Ben Veyseh and Franck Dernoncourt and Thien Huu Nguyen}},
title={{Open Multilingual LLM Evaluation Leaderboard}},
year={{2023}}
}}
```
"""
|