File size: 1,495 Bytes
f067bfb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90fafdc
 
 
 
 
 
 
 
 
 
 
 
 
f067bfb
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
TITLE = '<h1 align="center" id="space-title">Open Multilingual LLM Evaluation Leaderboard</h1>'

INTRO_TEXT = f"""
## About

This leaderboard shows the performance of pretrained models in 29 languages on four benchmarks:

- <a href="https://arxiv.org/abs/1803.05457" target="_blank">  AI2 Reasoning Challenge </a> (25-shot) 
- <a href="https://arxiv.org/abs/1905.07830" target="_blank">  HellaSwag </a> (10-shot) 
- <a href="https://arxiv.org/abs/2009.03300" target="_blank">  MMLU </a>  (5-shot) 
- <a href="https://arxiv.org/abs/2109.07958" target="_blank">  TruthfulQA </a> (0-shot)

The evaluation data was translated into 29 languages using ChatGPT.

"""

HOW_TO = f"""
## How to list your model performance on this leaderboard:

Send an email with title [Open mLLM Loaderboard] to vietl@uoregon.edu with the huggingface's model name.

We will run your model on the four benchmarks and add it to the leaderboard.
"""

CREDIT = f"""
## Credit

To make this website, we use the following resources:

- Datasets (AI2_ARC, HellaSwag, MMLU, TruthfulQA)
- Funding and GPU access (Adobe Research)
- Evaluation code (EutherAI's lm_evaluation_harness repo)
- Leaderboard code (Huggingface4's open_llm_leaderboard repo)

"""


CITATION = f"""
## Citation

```

@misc{{lai2023openllmbenchmark,
    author = {{Viet Lai and Nghia Trung Ngo and Amir Pouran Ben Veyseh and Franck Dernoncourt and Thien Huu Nguyen}},
    title={{Open Multilingual LLM Evaluation Leaderboard}},
    year={{2023}}
}}
```
"""