Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import gradio as gr | |
import core as core | |
from style import CSS, LANG_SYMBOLS, T_SYMBOLS, TITLE | |
demo = gr.Blocks(css=CSS) | |
with demo: | |
gr.HTML(TITLE) | |
gr.Markdown( | |
"This is a collection of multilingual evaluation results obtained using our fork of the LM-evaluation-harness (https://github.com/OpenGPTX/lm-evaluation-harness), based on V1 of the https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard.\ | |
Note that currently, benchmarks are available in 21 European languages (Irish, Maltese, Croatian missing).", | |
elem_classes="markdown-text", | |
) | |
selected_tab = gr.State(value=0) | |
with gr.Tabs(elem_classes="tab-buttons") as tabs: | |
with gr.TabItem( | |
"π LLM accuracy benchmark", | |
elem_id="llm-benchmark-tab-table-acc", | |
id=0, | |
) as acc: | |
with gr.Column(): | |
with gr.Row(): | |
search_bar = gr.Textbox( | |
label="Search models", | |
placeholder=" π Separate multiple queries with ';' and press ENTER...", | |
show_label=True, | |
elem_id="search-bar", | |
) | |
model_types = gr.CheckboxGroup( | |
label="Select model type", | |
choices=[ | |
( | |
f"Pretrained {T_SYMBOLS['pretrained']}", | |
T_SYMBOLS["pretrained"], | |
), | |
(f"Chat {T_SYMBOLS['chat']}", T_SYMBOLS["chat"]), | |
], | |
value=list(T_SYMBOLS.values()), | |
) | |
with gr.Row(): | |
langs_bar = gr.CheckboxGroup( | |
choices=[(LANG_SYMBOLS.get(l, l), l) for l in core.languages_list], | |
value=core.languages_list, | |
label="Select languages to average over", | |
elem_id="column-select", | |
interactive=True, | |
scale=6, | |
) | |
with gr.Column(scale=1): | |
clear = gr.ClearButton( | |
langs_bar, | |
value="Deselect all languages", | |
size="sm", | |
scale=1, | |
) | |
select = gr.Button( | |
value="Select all languages", | |
size="sm", | |
scale=1, | |
) | |
select.click( | |
lambda: gr.CheckboxGroup(value=core.languages_list), | |
inputs=[], | |
outputs=langs_bar, | |
) | |
with gr.Row(): | |
shown_tasks = gr.CheckboxGroup( | |
choices=core.get_available_task_groups(core.get_selected_task_type(0), True), | |
value=core.get_available_task_groups(core.get_selected_task_type(0), True), | |
label="Select tasks to show", | |
elem_id="column-select", | |
interactive=True, | |
scale=50, | |
) | |
clear = gr.ClearButton( | |
shown_tasks, | |
value="Deselect all tasks", | |
size="sm", | |
scale=1, | |
) | |
select = gr.Button( | |
value="Select all tasks", | |
size="sm", | |
scale=1, | |
) | |
select.click( | |
lambda: gr.CheckboxGroup(value=core.get_available_task_groups(core.get_selected_task_type(0), True)), | |
inputs=[], | |
outputs=shown_tasks, | |
) | |
leaderboard_table = gr.Dataframe() | |
with gr.TabItem( | |
"π LLM translation benchmark", | |
elem_id="llm-benchmark-tab-table-misc", | |
id=1, | |
) as misc: | |
with gr.Column(): | |
with gr.Row(): | |
search_bar_misc = gr.Textbox( | |
label="Search models", | |
placeholder=" π Separate multiple queries with ';' and press ENTER...", | |
show_label=True, | |
elem_id="search-bar", | |
) | |
model_types_misc = gr.CheckboxGroup( | |
label="Select model type", | |
choices=[ | |
( | |
f"Pretrained {T_SYMBOLS['pretrained']}", | |
T_SYMBOLS["pretrained"], | |
), | |
(f"Chat {T_SYMBOLS['chat']}", T_SYMBOLS["chat"]), | |
], | |
value=list(T_SYMBOLS.values()), | |
) | |
with gr.Row(): | |
langs_bar_misc = gr.CheckboxGroup( | |
choices=[(LANG_SYMBOLS.get(l, l), l) for l in core.languages_list], | |
value=core.languages_list, | |
label="Select languages to average over", | |
elem_id="column-select", | |
interactive=True, | |
scale=6, | |
) | |
with gr.Column(scale=1): | |
clear_misc = gr.ClearButton( | |
langs_bar_misc, | |
value="Deselect all languages", | |
size="sm", | |
scale=1, | |
) | |
select_misc = gr.Button( | |
value="Select all languages", | |
size="sm", | |
scale=1, | |
) | |
select_misc.click( | |
lambda: gr.CheckboxGroup(value=core.languages_list), | |
inputs=[], | |
outputs=langs_bar_misc, | |
) | |
with gr.Row(): | |
shown_tasks_misc = gr.CheckboxGroup( | |
choices=core.get_available_task_groups(core.get_selected_task_type(1), False), | |
value=core.get_available_task_groups(core.get_selected_task_type(1), False), | |
label="Select tasks to show", | |
elem_id="column-select", | |
interactive=True, | |
scale=50, | |
) | |
clear_tasks_misc = gr.ClearButton( | |
shown_tasks_misc, | |
value="Deselect all tasks", | |
size="sm", | |
scale=1, | |
) | |
select_all_tasks_misc = gr.Button( | |
value="Select all tasks", | |
size="sm", | |
scale=1, | |
) | |
select_all_tasks_misc.click( | |
lambda: gr.CheckboxGroup(value=core.get_available_task_groups(core.get_selected_task_type(1), False)), | |
inputs=[], | |
outputs=shown_tasks_misc, | |
) | |
leaderboard_table_misc = gr.Dataframe() | |
for comp, fn in [ | |
(search_bar, "submit"), | |
(langs_bar, "change"), | |
(shown_tasks, "change"), | |
(model_types, "change"), | |
]: | |
getattr(comp, fn)( | |
core.update_df, | |
[shown_tasks, search_bar, langs_bar, model_types, gr.State(value=True)], | |
leaderboard_table, | |
) | |
for comp, fn in [ | |
(search_bar_misc, "submit"), | |
(langs_bar_misc, "change"), | |
(shown_tasks_misc, "change"), | |
(model_types_misc, "change"), | |
]: | |
getattr(comp, fn)( | |
core.update_df, | |
[shown_tasks_misc, search_bar_misc, langs_bar_misc, model_types_misc, gr.State(value=False)], | |
leaderboard_table_misc, | |
) | |
gr.Blocks.load( | |
block=demo, | |
fn=core.update_df, | |
inputs=[shown_tasks, search_bar, langs_bar, model_types, gr.State(value=True)], | |
outputs=leaderboard_table, | |
) | |
gr.Blocks.load( | |
block=demo, | |
fn=core.update_df, | |
inputs=[shown_tasks_misc, search_bar_misc, langs_bar_misc, model_types_misc, gr.State(value=False)], | |
outputs=leaderboard_table_misc, | |
) | |
demo.launch() | |