comparator / src /details.py
albertvillanova's picture
Make the code robust against HTTP errors
719c272 verified
raw
history blame
4.21 kB
import asyncio
import gradio as gr
import pandas as pd
import src.constants as constants
from src.hub import glob, load_jsonlines_file
def update_task_description_component(task):
base_description = constants.TASK_DESCRIPTIONS.get(task, "")
additional_info = "A higher score is a better score."
description = f"{base_description}\n\n{additional_info}" if base_description else additional_info
return gr.Textbox(
description,
label="Task Description",
lines=6,
visible=True,
)
def update_subtasks_component(task, profile: gr.OAuthProfile | None):
visible_login_btn = True if task == "leaderboard_gpqa" else False
subtasks = None if task == "leaderboard_gpqa" and not profile else constants.SUBTASKS.get(task)
return (
gr.LoginButton(size="sm", visible=visible_login_btn),
gr.Radio(
choices=subtasks,
info="Evaluation subtasks to be loaded",
value=None,
),
)
def update_load_details_component(model_id, subtask):
if model_id and subtask:
return gr.Button("Load Details", interactive=True)
else:
return gr.Button("Load Details", interactive=False)
def fetch_details_paths(model_id, subtask):
model_name_sanitized = model_id.replace("/", "__")
dataset_id = constants.DETAILS_DATASET_ID.format(model_name_sanitized=model_name_sanitized)
filename = constants.DETAILS_FILENAME.format(subtask=subtask)
path = f"{dataset_id}/**/{filename}"
return glob(path)
async def load_details_dataframe(model_id, subtask):
if not model_id or not subtask:
return
paths = await asyncio.to_thread(fetch_details_paths, model_id, subtask)
if not paths:
return
path = max(paths)
data = await load_jsonlines_file(path)
if not data:
return
df = pd.json_normalize(data)
# Keep model_name:
df["model_name"] = model_id
return df.sort_values("doc_id").set_index("doc_id", drop=False).set_index("model_name", append=True)
async def load_details(subtask, *model_ids_lists):
dfs = await asyncio.gather(
*[
load_details_dataframe(model_id, subtask)
for model_ids in model_ids_lists
if model_ids
for model_id in model_ids
]
)
dfs = [df for df in dfs if df is not None]
if dfs:
return pd.concat(dfs)
def display_details(df, sample_idx, show_only_differences):
if df is None:
return
df = df.loc[df.index.levels[0][sample_idx]]
df = df.T.rename_axis(columns=None)
# Style
# - Option: Show only differences
any_difference = pd.Series(False, index=df.index)
if show_only_differences:
any_difference = df.ne(df.iloc[:, 0], axis=0).any(axis=1)
return (
df.style.format(escape="html", na_rep="")
# .hide(axis="index")
# Hide non-different rows
.hide([row for row in df.index if show_only_differences and not any_difference[row]])
# Fix overflow
.set_table_styles(
[
{
"selector": "td",
"props": [("overflow-wrap", "break-word"), ("max-width", "1px")],
},
{
"selector": ".col_heading",
"props": [("width", f"{100 / len(df.columns)}%")],
},
]
)
.to_html()
)
def update_sample_idx_component(df):
if df is None:
return
maximum = len(df) - 1
return gr.Number(
label="Sample Index",
info="Index of the sample to be displayed",
value=0,
minimum=0,
maximum=maximum,
visible=True,
)
def clear_details():
# model_ids, details_dataframe, details_task, subtask, load_details_btn, sample_idx
return (
gr.Dropdown(value=[]),
None,
None,
None,
gr.Button("Load Details", interactive=False),
gr.Number(label="Sample Index", info="Index of the sample to be displayed", value=0, minimum=0, visible=False),
)
def display_loading_message_for_details():
return "<h3 style='text-align: center;'>Loading...</h3>"