Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import gradio as gr | |
from src.constants import SUBTASKS, TASKS | |
from src.details import update_subtasks_component, update_load_details_component, load_details_dataframes, \ | |
display_details, update_sample_idx_component, clear_details | |
from src.results import fetch_result_paths, filter_latest_result_path_per_model, update_load_results_component, \ | |
load_results_dataframes, display_results, update_tasks_component, clear_results | |
# if __name__ == "__main__": | |
latest_result_path_per_model = filter_latest_result_path_per_model(fetch_result_paths()) | |
with gr.Blocks(fill_height=True) as demo: | |
gr.HTML("<h1 style='text-align: center;'>Compare Results of the π€ Open LLM Leaderboard</h1>") | |
gr.HTML("<h3 style='text-align: center;'>Select 2 models to load and compare their results</h3>") | |
with gr.Row(): | |
with gr.Column(): | |
model_id_1 = gr.Dropdown(choices=list(latest_result_path_per_model.keys()), label="Models") | |
dataframe_1 = gr.Dataframe(visible=False) | |
with gr.Column(): | |
model_id_2 = gr.Dropdown(choices=list(latest_result_path_per_model.keys()), label="Models") | |
dataframe_2 = gr.Dataframe(visible=False) | |
with gr.Row(): | |
# with gr.Tab("All"): | |
# pass | |
with gr.Tab("Results"): | |
task = gr.Radio( | |
["All"] + list(TASKS.values()), | |
label="Tasks", | |
info="Evaluation tasks to be displayed", | |
value="All", | |
interactive=False, | |
) | |
load_results_btn = gr.Button("Load Results", interactive=False) | |
clear_results_btn = gr.Button("Clear Results") | |
with gr.Tab("Results"): | |
results = gr.HTML() | |
with gr.Tab("Configs"): | |
configs = gr.HTML() | |
with gr.Tab("Details"): | |
details_task = gr.Radio( | |
["All"] + list(TASKS.values()), | |
label="Tasks", | |
info="Evaluation tasks to be displayed", | |
value="All", | |
interactive=True, | |
) | |
subtask = gr.Radio( | |
SUBTASKS.get(details_task.value), | |
label="Subtasks", | |
info="Evaluation subtasks to be displayed (choose one of the Tasks above)", | |
) | |
load_details_btn = gr.Button("Load Details", interactive=False) | |
clear_details_btn = gr.Button("Clear Details") | |
sample_idx = gr.Number( | |
label="Sample Index", | |
info="Index of the sample to be displayed", | |
value=0, | |
minimum=0, | |
visible=False | |
) | |
details = gr.HTML() | |
details_dataframe_1 = gr.Dataframe(visible=False) | |
details_dataframe_2 = gr.Dataframe(visible=False) | |
details_dataframe = gr.DataFrame(visible=False) | |
model_id_1.change( | |
fn=update_load_results_component, | |
outputs=load_results_btn, | |
) | |
load_results_btn.click( | |
fn=load_results_dataframes, | |
inputs=[model_id_1, model_id_2], | |
outputs=[dataframe_1, dataframe_2], | |
).then( | |
fn=update_tasks_component, | |
outputs=task, | |
) | |
gr.on( | |
triggers=[dataframe_1.change, dataframe_2.change, task.change], | |
fn=display_results, | |
inputs=[task, dataframe_1, dataframe_2], | |
outputs=[results, configs], | |
) | |
clear_results_btn.click( | |
fn=clear_results, | |
outputs=[model_id_1, model_id_2, dataframe_1, dataframe_2, task], | |
) | |
details_task.change( | |
fn=update_subtasks_component, | |
inputs=details_task, | |
outputs=subtask, | |
) | |
gr.on( | |
triggers=[model_id_1.change, model_id_2.change, subtask.change, details_task.change], | |
fn=update_load_details_component, | |
inputs=[model_id_1, model_id_2, subtask], | |
outputs=load_details_btn, | |
) | |
load_details_btn.click( | |
fn=load_details_dataframes, | |
inputs=[subtask, model_id_1, model_id_2], | |
outputs=[details_dataframe_1, details_dataframe_2], | |
).then( | |
fn=update_sample_idx_component, | |
inputs=[details_dataframe_1, details_dataframe_2], | |
outputs=sample_idx, | |
) | |
gr.on( | |
triggers=[details_dataframe_1.change, details_dataframe_2.change, sample_idx.change], | |
fn=display_details, | |
inputs=[sample_idx, details_dataframe_1, details_dataframe_2], | |
outputs=details, | |
) | |
clear_details_btn.click( | |
fn=clear_details, | |
outputs=[model_id_1, model_id_2, details_dataframe_1, details_dataframe_2, details_task, subtask, sample_idx], | |
) | |
demo.launch() | |