File size: 6,704 Bytes
84f40ff
 
6d97820
84f40ff
df330ee
84f40ff
 
 
 
3caf072
 
 
 
 
84f40ff
df330ee
 
 
 
 
 
 
 
6d97820
84f40ff
 
 
 
 
 
6d97820
 
84f40ff
 
6d97820
84f40ff
6d97820
aae1219
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
df330ee
 
3caf072
aae1219
3caf072
 
aae1219
 
3caf072
 
aae1219
 
3caf072
 
aae1219
6d97820
 
 
 
 
 
 
 
 
 
 
 
3caf072
aae1219
3caf072
6d97820
 
df330ee
 
98a0620
6d97820
 
 
 
df330ee
6d97820
df330ee
 
 
 
98a0620
59ceb6d
 
 
 
 
 
 
 
 
aae1219
3caf072
98a0620
acffd67
6d97820
aa0703f
aae1219
 
6d97820
84f40ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import json
import os
from ast import literal_eval
import pandas as pd
import re

from src.display.formatting import has_no_nan_values, make_clickable_model
from src.display.utils import AutoEvalColumn, EvalQueueColumn
from src.leaderboard.read_evals import get_raw_eval_results
from src.about import (
    nc_tasks,
    nr_tasks,
    lp_tasks,
)

def sanitize_string(input_string):
    # Remove leading and trailing whitespace
    input_string = input_string.strip()
    
    # Remove leading whitespace on each line
    sanitized_string = re.sub(r'(?m)^\s+', '', input_string)
    
    return sanitized_string
'''
def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
    """Creates a dataframe from all the individual experiment results"""
    raw_data = get_raw_eval_results(results_path, requests_path)
    all_data_json = [v.to_dict() for v in raw_data]

    df = pd.DataFrame.from_records(all_data_json)
    #df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
    #df = df[cols].round(decimals=2)

    # filter out if any of the benchmarks have not been produced
    #df = df[has_no_nan_values(df, benchmark_cols)]
    return raw_data, df
'''

# Function to extract the numerical part before '+'
def extract_x(value):
    return float(value.split('+')[0])

# Function to highlight the highest (or lowest) value based on X
def make_bold(df, cols, ascending):
    df_highlight = df.copy()
    
    def apply_highlight(s):
        if ascending:
            max_idx = s.apply(extract_x).idxmin()
        else:
            max_idx = s.apply(extract_x).idxmax()
        
        return ['font-weight: bold' if i == max_idx else '' for i in range(len(s))]
    
    styler = df_highlight.style.apply(lambda x: apply_highlight(x) if x.name in cols else ['']*len(x), axis=0)
    return styler

def format_number(num):
    return f"{num:.3f}"
def get_leaderboard_df(EVAL_REQUESTS_PATH, task_type) -> pd.DataFrame:
    if task_type in ['Node Classification', 'Entity Classification']:
        ascending = False
        tasks = nc_tasks
        task_type = ['Node Classification', 'Entity Classification']
    elif task_type in ['Node Regression', 'Entity Regression']:
        ascending = True
        tasks = nr_tasks
        task_type = ['Node Regression', 'Entity Regression']
    elif task_type in ['Link Prediction', 'Recommendation']:
        ascending = False
        tasks = lp_tasks
        task_type = ['Link Prediction', 'Recommendation']

    model_result_filepaths = []
    for root,_, files in os.walk(EVAL_REQUESTS_PATH):
        if len(files) == 0 or any([not f.endswith(".json") for f in files]):
            continue
        for file in files:
            model_result_filepaths.append(os.path.join(root, file))
    
    model_res = []
    for model in model_result_filepaths:
        import json
        with open(model) as f:
            out = json.load(f)
            if ('task' in out) and (out['task'] in task_type):
                model_res.append(out)

    for model in model_res:
        model["test"] = literal_eval(model["test"].split('}')[0]+'}')
        model["valid"] = literal_eval(model["valid"].split('}')[0]+'}')
        #model["params"] = int(model["params"])
        model['submitted_time'] = model['submitted_time'].split('T')[0]
        #model['paper_url'] = '[Link](' + model['paper_url'] + ')'
        #model['github_url'] = '[Link](' + model['github_url'] + ')'

    name2short_name = {task.value.benchmark: task.value.benchmark for task in tasks}
    for model in model_res:
        model.update({
            name2short_name[i]: (f"{format_number(model['test'][i][0])} ± {format_number(model['test'][i][1])}" if i in model['test'] else '-')
            for i in name2short_name
        })
    columns_to_show = ['model', 'author', 'email', 'paper_url', 'github_url', 'submitted_time'] + list(name2short_name.values())

    # Check if model_res is empty
    if len(model_res) > 0:
        df_res = pd.DataFrame([{col: model[col] for col in columns_to_show} for model in model_res])
    else:
        # Initialize an empty DataFrame with the desired columns
        df_res = pd.DataFrame(columns=columns_to_show)

    #df_res = pd.DataFrame([{col: model[col] for col in columns_to_show} for model in model_res])
    
    ranks = df_res[list(name2short_name.values())].rank(ascending = ascending)
    df_res.rename(columns={'model': 'Model', 'author': 'Author', 'email': 'Email', 'paper_url': 'Paper URL', 'github_url': 'Github URL', 'submitted_time': 'Time'}, inplace=True)
    df_res.Model.replace('Relbench User Study', 'Human Data Scientist', inplace=True)
    df_res['Average Rank⬆️'] = ranks.mean(axis=1)
    df_res.sort_values(by='Average Rank⬆️', ascending=True, inplace=True)
    #df_res = make_bold(df_res, list(name2short_name.values()), ascending = ascending)
    print(df_res)
    return df_res

def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
    """Creates the different dataframes for the evaluation queues requestes"""
    entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
    all_evals = []

    for entry in entries:
        if ".json" in entry:
            file_path = os.path.join(save_path, entry)
            with open(file_path) as fp:
                data = json.load(fp)

            data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
            data[EvalQueueColumn.revision.name] = data.get("revision", "main")

            all_evals.append(data)
        elif ".md" not in entry:
            # this is a folder
            sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if not e.startswith(".")]
            for sub_entry in sub_entries:
                file_path = os.path.join(save_path, entry, sub_entry)
                with open(file_path) as fp:
                    data = json.load(fp)

                data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
                data[EvalQueueColumn.revision.name] = data.get("revision", "main")
                all_evals.append(data)

    pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
    running_list = [e for e in all_evals if e["status"] == "RUNNING"]
    finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
    df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
    df_running = pd.DataFrame.from_records(running_list, columns=cols)
    df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
    return df_finished[cols], df_running[cols], df_pending[cols]