laiviet commited on
Commit
f067bfb
1 Parent(s): bbc4eb6

Fix contents

Browse files
Files changed (2) hide show
  1. app.py +7 -4
  2. content.py +36 -0
app.py CHANGED
@@ -3,7 +3,7 @@ import json
3
  import glob
4
  from collections import defaultdict
5
  import gradio as gr
6
-
7
  import glob
8
 
9
  ARC = "arc_challenge"
@@ -80,7 +80,7 @@ LANG_COL = "Language"
80
  AVERAGE_COL = "Average"
81
  ARC_COL = "ARC (25-shot)"
82
  HELLASWAG_COL = "HellaSwag (10-shot)️"
83
- MMLU_COL = "MMLU (5-shot))️"
84
  TRUTHFULQA_COL = "TruthfulQA (0-shot)"
85
 
86
  COLS = [MODEL_COL, LANG_COL, AVERAGE_COL, ARC_COL, HELLASWAG_COL, MMLU_COL, TRUTHFULQA_COL]
@@ -91,8 +91,9 @@ leaderboard_df = get_leaderboard_df(*args)
91
 
92
  demo = gr.Blocks()
93
  with demo:
94
- gr.HTML('Open Multilingual Large Language Model Evaluation Leaderboard')
95
- # gr.Markdown('INTRODUCTION TEXT', elem_classes="markdown-text")
 
96
 
97
  with gr.Box():
98
  search_bar = gr.Textbox(
@@ -107,4 +108,6 @@ with demo:
107
  elem_id="leaderboard-table",
108
  )
109
 
 
 
110
  demo.launch()
 
3
  import glob
4
  from collections import defaultdict
5
  import gradio as gr
6
+ from content import *
7
  import glob
8
 
9
  ARC = "arc_challenge"
 
80
  AVERAGE_COL = "Average"
81
  ARC_COL = "ARC (25-shot)"
82
  HELLASWAG_COL = "HellaSwag (10-shot)️"
83
+ MMLU_COL = "MMLU (5-shot)"
84
  TRUTHFULQA_COL = "TruthfulQA (0-shot)"
85
 
86
  COLS = [MODEL_COL, LANG_COL, AVERAGE_COL, ARC_COL, HELLASWAG_COL, MMLU_COL, TRUTHFULQA_COL]
 
91
 
92
  demo = gr.Blocks()
93
  with demo:
94
+ gr.HTML(TITLE)
95
+ gr.Markdown(INTRO_TEXT, elem_classes="markdown-text")
96
+ gr.Markdown(HOW_TO, elem_classes="markdown-text")
97
 
98
  with gr.Box():
99
  search_bar = gr.Textbox(
 
108
  elem_id="leaderboard-table",
109
  )
110
 
111
+ gr.Markdown(CITATION, elem_classes="markdown-text")
112
+
113
  demo.launch()
content.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TITLE = '<h1 align="center" id="space-title">Open Multilingual LLM Evaluation Leaderboard</h1>'
2
+
3
+ INTRO_TEXT = f"""
4
+ ## About
5
+
6
+ This leaderboard shows the performance of pretrained models in 29 languages on four benchmarks:
7
+
8
+ - <a href="https://arxiv.org/abs/1803.05457" target="_blank"> AI2 Reasoning Challenge </a> (25-shot)
9
+ - <a href="https://arxiv.org/abs/1905.07830" target="_blank"> HellaSwag </a> (10-shot)
10
+ - <a href="https://arxiv.org/abs/2009.03300" target="_blank"> MMLU </a> (5-shot)
11
+ - <a href="https://arxiv.org/abs/2109.07958" target="_blank"> TruthfulQA </a> (0-shot)
12
+
13
+ The evaluation data was translated into 29 languages using ChatGPT.
14
+
15
+ """
16
+
17
+ HOW_TO = f"""
18
+ ## How to list your model performance on this leaderboard:
19
+
20
+ Send an email with title [Open mLLM Loaderboard] to vietl@uoregon.edu with the huggingface's model name.
21
+
22
+ We will run your model on the four benchmarks and add it to the leaderboard.
23
+ """
24
+
25
+ CITATION = f"""
26
+ ## Citation
27
+
28
+ ```
29
+
30
+ @misc{{lai2023openllmbenchmark,
31
+ author = {{Viet Lai and Nghia Trung Ngo and Amir Pouran Ben Veyseh and Franck Dernoncourt and Thien Huu Nguyen}},
32
+ title={{Open Multilingual LLM Evaluation Leaderboard}},
33
+ year={{2023}}
34
+ }}
35
+ ```
36
+ """