Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
KlaudiaTH
commited on
Commit
โข
a200cc8
1
Parent(s):
8fcff38
Reformatted
Browse files
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
|
3 |
import core as core
|
4 |
-
from style import CSS, T_SYMBOLS, TITLE
|
5 |
|
6 |
demo = gr.Blocks(css=CSS)
|
7 |
with demo:
|
@@ -38,7 +38,7 @@ with demo:
|
|
38 |
)
|
39 |
with gr.Row():
|
40 |
langs_bar = gr.CheckboxGroup(
|
41 |
-
choices=[(LANG_SYMBOLS.get(l,l),l) for l in core.languages_list],
|
42 |
value=core.languages_list,
|
43 |
label="Select languages to average over",
|
44 |
elem_id="column-select",
|
@@ -52,9 +52,7 @@ with demo:
|
|
52 |
size="sm",
|
53 |
scale=1,
|
54 |
)
|
55 |
-
select = gr.Button(
|
56 |
-
value="Select all languages", size="sm", scale=1
|
57 |
-
)
|
58 |
|
59 |
def update_bar(selected_tab):
|
60 |
if selected_tab in [0, 1]:
|
@@ -88,14 +86,10 @@ with demo:
|
|
88 |
label="Select evaluation type",
|
89 |
scale=29,
|
90 |
)
|
91 |
-
clear = gr.ClearButton(
|
92 |
-
shown_tasks, value="Deselect all tasks", size="sm", scale=21
|
93 |
-
)
|
94 |
|
95 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
96 |
-
with gr.TabItem(
|
97 |
-
"๐
LLM accuracy benchmark", elem_id="llm-benchmark-tab-table-acc", id=0
|
98 |
-
) as acc:
|
99 |
leaderboard_table = gr.Dataframe()
|
100 |
with gr.TabItem(
|
101 |
"๐ LLM translation benchmark",
|
@@ -106,7 +100,7 @@ with demo:
|
|
106 |
|
107 |
demo.load(
|
108 |
core.update_task_groups_and_fewshot,
|
109 |
-
[gr.State(value=0), model_types, langs_bar,fewshot],
|
110 |
[shown_tasks, fewshot, selected_tab, model_types, langs_bar],
|
111 |
)
|
112 |
fewshot.change(
|
@@ -142,7 +136,6 @@ with demo:
|
|
142 |
leaderboard_table_misc,
|
143 |
)
|
144 |
|
145 |
-
|
146 |
gr.Blocks.load(
|
147 |
block=demo,
|
148 |
fn=core.update_df,
|
|
|
1 |
import gradio as gr
|
2 |
|
3 |
import core as core
|
4 |
+
from style import CSS, LANG_SYMBOLS, T_SYMBOLS, TITLE
|
5 |
|
6 |
demo = gr.Blocks(css=CSS)
|
7 |
with demo:
|
|
|
38 |
)
|
39 |
with gr.Row():
|
40 |
langs_bar = gr.CheckboxGroup(
|
41 |
+
choices=[(LANG_SYMBOLS.get(l, l), l) for l in core.languages_list],
|
42 |
value=core.languages_list,
|
43 |
label="Select languages to average over",
|
44 |
elem_id="column-select",
|
|
|
52 |
size="sm",
|
53 |
scale=1,
|
54 |
)
|
55 |
+
select = gr.Button(value="Select all languages", size="sm", scale=1)
|
|
|
|
|
56 |
|
57 |
def update_bar(selected_tab):
|
58 |
if selected_tab in [0, 1]:
|
|
|
86 |
label="Select evaluation type",
|
87 |
scale=29,
|
88 |
)
|
89 |
+
clear = gr.ClearButton(shown_tasks, value="Deselect all tasks", size="sm", scale=21)
|
|
|
|
|
90 |
|
91 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
92 |
+
with gr.TabItem("๐
LLM accuracy benchmark", elem_id="llm-benchmark-tab-table-acc", id=0) as acc:
|
|
|
|
|
93 |
leaderboard_table = gr.Dataframe()
|
94 |
with gr.TabItem(
|
95 |
"๐ LLM translation benchmark",
|
|
|
100 |
|
101 |
demo.load(
|
102 |
core.update_task_groups_and_fewshot,
|
103 |
+
[gr.State(value=0), model_types, langs_bar, fewshot],
|
104 |
[shown_tasks, fewshot, selected_tab, model_types, langs_bar],
|
105 |
)
|
106 |
fewshot.change(
|
|
|
136 |
leaderboard_table_misc,
|
137 |
)
|
138 |
|
|
|
139 |
gr.Blocks.load(
|
140 |
block=demo,
|
141 |
fn=core.update_df,
|
core.py
CHANGED
@@ -7,7 +7,7 @@ import pandas as pd
|
|
7 |
from datasets import load_dataset
|
8 |
|
9 |
import style
|
10 |
-
from style import
|
11 |
|
12 |
ZERO_SHOT_ONLY = ["BELEBELE"]
|
13 |
FEW_SHOT_ONLY = ["GSM8K", "TruthfulQA"]
|
@@ -115,7 +115,7 @@ def update_df(
|
|
115 |
# aggregate results over languages per task
|
116 |
df = aggregate_langs(df, tasks, langs)
|
117 |
|
118 |
-
df = df.sort_values(by=
|
119 |
|
120 |
# filter models by search bar and model type
|
121 |
df = search_model(df, model_query)
|
@@ -127,7 +127,12 @@ def update_df(
|
|
127 |
return sort_cols(df, fewshot)
|
128 |
|
129 |
|
130 |
-
def update_task_groups_and_fewshot(
|
|
|
|
|
|
|
|
|
|
|
131 |
selected_task_type = get_selected_task_type(current_selected_tab)
|
132 |
available_tasks = get_available_task_groups(selected_task_type, is_fewshot_current)
|
133 |
new_selected_tasks = available_tasks.copy()
|
@@ -159,7 +164,7 @@ def update_task_groups_and_fewshot(current_selected_tab: int, model_types, langs
|
|
159 |
(f"Chat {T_SYMBOLS['chat']}", T_SYMBOLS["chat"]),
|
160 |
],
|
161 |
value=list(T_SYMBOLS.values()),
|
162 |
-
interactive=True
|
163 |
)
|
164 |
langs_bar = gr.CheckboxGroup(
|
165 |
choices=[(LANG_SYMBOLS.get(l, l), l) for l in languages_list],
|
|
|
7 |
from datasets import load_dataset
|
8 |
|
9 |
import style
|
10 |
+
from style import LANG_SYMBOLS, T_SYMBOLS
|
11 |
|
12 |
ZERO_SHOT_ONLY = ["BELEBELE"]
|
13 |
FEW_SHOT_ONLY = ["GSM8K", "TruthfulQA"]
|
|
|
115 |
# aggregate results over languages per task
|
116 |
df = aggregate_langs(df, tasks, langs)
|
117 |
|
118 |
+
df = df.sort_values(by="Average", ascending=False)
|
119 |
|
120 |
# filter models by search bar and model type
|
121 |
df = search_model(df, model_query)
|
|
|
127 |
return sort_cols(df, fewshot)
|
128 |
|
129 |
|
130 |
+
def update_task_groups_and_fewshot(
|
131 |
+
current_selected_tab: int,
|
132 |
+
model_types,
|
133 |
+
langs_bar,
|
134 |
+
is_fewshot_current: bool = False,
|
135 |
+
):
|
136 |
selected_task_type = get_selected_task_type(current_selected_tab)
|
137 |
available_tasks = get_available_task_groups(selected_task_type, is_fewshot_current)
|
138 |
new_selected_tasks = available_tasks.copy()
|
|
|
164 |
(f"Chat {T_SYMBOLS['chat']}", T_SYMBOLS["chat"]),
|
165 |
],
|
166 |
value=list(T_SYMBOLS.values()),
|
167 |
+
interactive=True,
|
168 |
)
|
169 |
langs_bar = gr.CheckboxGroup(
|
170 |
choices=[(LANG_SYMBOLS.get(l, l), l) for l in languages_list],
|
style.py
CHANGED
@@ -11,10 +11,7 @@ CSS = """
|
|
11 |
}
|
12 |
"""
|
13 |
|
14 |
-
T_SYMBOLS = {
|
15 |
-
"pretrained": "๐ข",
|
16 |
-
"chat": "๐ฌ"
|
17 |
-
}
|
18 |
|
19 |
LANG_SYMBOLS = {
|
20 |
"BG": "๐ง๐ฌ BG",
|
@@ -37,6 +34,5 @@ LANG_SYMBOLS = {
|
|
37 |
"RO": "๐ท๐ด RO",
|
38 |
"SK": "๐ธ๐ฐ SK",
|
39 |
"SL": "๐ธ๐ฎ SL",
|
40 |
-
"SV": "๐ธ๐ช SV"
|
41 |
}
|
42 |
-
|
|
|
11 |
}
|
12 |
"""
|
13 |
|
14 |
+
T_SYMBOLS = {"pretrained": "๐ข", "chat": "๐ฌ"}
|
|
|
|
|
|
|
15 |
|
16 |
LANG_SYMBOLS = {
|
17 |
"BG": "๐ง๐ฌ BG",
|
|
|
34 |
"RO": "๐ท๐ด RO",
|
35 |
"SK": "๐ธ๐ฐ SK",
|
36 |
"SL": "๐ธ๐ฎ SL",
|
37 |
+
"SV": "๐ธ๐ช SV",
|
38 |
}
|
|