File size: 3,982 Bytes
2f4d877
841e241
 
 
 
30a0c61
7e32ac7
841e241
 
ca2b34f
651545d
 
 
ca2b34f
651545d
ca2b34f
e970061
ca2b34f
 
 
 
26ef426
 
 
 
 
 
 
 
 
 
841e241
 
 
bea7063
 
841e241
 
 
 
 
7e32ac7
 
 
 
 
 
 
 
2f4d877
841e241
 
7e32ac7
841e241
 
 
fae0e19
841e241
bea7063
 
 
841e241
 
bea7063
 
 
 
841e241
 
bea7063
 
841e241
bea7063
 
611a3ed
d46be0d
 
 
6cf57e4
 
d46be0d
841e241
611a3ed
841e241
6cf57e4
 
d46be0d
 
 
 
 
 
bea7063
 
 
 
 
d46be0d
 
841e241
 
 
 
bea7063
 
 
 
841e241
 
 
 
 
 
 
 
 
 
 
bea7063
841e241
bea7063
611a3ed
 
 
1c1cb58
611a3ed
841e241
8f7c83f
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import asyncio

import gradio as gr
import pandas as pd

import src.constants as constants
from src.hub import glob, load_jsonlines_file


def update_task_description_component(task):
    base_description = constants.TASK_DESCRIPTIONS.get(task, "")
    additional_info = "A higher score is a better score."
    description = f"{base_description}\n\n{additional_info}" if base_description else additional_info
    return gr.Textbox(
        description,
        label="Task Description",
        lines=6,
        visible=True,
    )


def update_subtasks_component(task, profile: gr.OAuthProfile | None):
    visible_login_btn = True if task == "leaderboard_gpqa" else False
    subtasks = None if task == "leaderboard_gpqa" and not profile else constants.SUBTASKS.get(task)
    return (
        gr.LoginButton(size="sm", visible=visible_login_btn),
        gr.Radio(
            choices=subtasks,
            info="Evaluation subtasks to be loaded",
            value=None,
        ),
    )


def update_load_details_component(model_id, subtask):
    if model_id and subtask:
        return gr.Button("Load Details", interactive=True)
    else:
        return gr.Button("Load Details", interactive=False)


def fetch_details_paths(model_id, subtask):
    model_name_sanitized = model_id.replace("/", "__")
    dataset_id = constants.DETAILS_DATASET_ID.format(model_name_sanitized=model_name_sanitized)
    filename = constants.DETAILS_FILENAME.format(subtask=subtask)
    path = f"{dataset_id}/**/{filename}"
    return glob(path)


async def load_details_dataframe(model_id, subtask):
    if not model_id or not subtask:
        return
    paths = fetch_details_paths(model_id, subtask)
    if not paths:
        return
    path = max(paths)
    data = await load_jsonlines_file(path)
    df = pd.json_normalize(data)
    # Keep model_name:
    df["model_name"] = model_id
    return df.sort_values("doc_id").set_index("doc_id", drop=False).set_index("model_name", append=True)


async def load_details(model_ids, subtask):
    dfs = await asyncio.gather(*[load_details_dataframe(model_id, subtask) for model_id in model_ids])
    if dfs:
        return pd.concat(dfs)


def display_details(df, sample_idx, show_only_differences):
    if df is None:
        return
    df = df.loc[df.index.levels[0][sample_idx]]
    df = df.T.rename_axis(columns=None)

    # Style
    # - Option: Show only differences
    any_difference = pd.Series(False, index=df.index)
    if show_only_differences:
        any_difference = df.ne(df.iloc[:, 0], axis=0).any(axis=1)

    return (
        df.style.format(escape="html", na_rep="")
        # .hide(axis="index")
        # Hide non-different rows
        .hide([row for row in df.index if show_only_differences and not any_difference[row]])
        # Fix overflow
        .set_table_styles(
            [
                {
                    "selector": "td",
                    "props": [("overflow-wrap", "break-word"), ("max-width", "1px")],
                },
                {
                    "selector": ".col_heading",
                    "props": [("width", f"{100 / len(df.columns)}%")],
                },
            ]
        )
        .to_html()
    )


def update_sample_idx_component(df):
    if df is None:
        return
    maximum = len(df) - 1
    return gr.Number(
        label="Sample Index",
        info="Index of the sample to be displayed",
        value=0,
        minimum=0,
        maximum=maximum,
        visible=True,
    )


def clear_details():
    # model_ids, details_dataframe, details_task, subtask, load_details_btn, sample_idx
    return (
        gr.Dropdown(value=[]),
        None,
        None,
        None,
        gr.Button("Load Details", interactive=False),
        gr.Number(label="Sample Index", info="Index of the sample to be displayed", value=0, minimum=0, visible=False),
    )


def display_loading_message_for_details():
    return "<h3 style='text-align: center;'>Loading...</h3>"