Spaces:
Running
Running
File size: 6,704 Bytes
84f40ff 6d97820 84f40ff df330ee 84f40ff 3caf072 84f40ff df330ee 6d97820 84f40ff 6d97820 84f40ff 6d97820 84f40ff 6d97820 aae1219 df330ee 3caf072 aae1219 3caf072 aae1219 3caf072 aae1219 3caf072 aae1219 6d97820 3caf072 aae1219 3caf072 6d97820 df330ee 98a0620 6d97820 df330ee 6d97820 df330ee 98a0620 59ceb6d aae1219 3caf072 98a0620 acffd67 6d97820 aa0703f aae1219 6d97820 84f40ff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import json
import os
from ast import literal_eval
import pandas as pd
import re
from src.display.formatting import has_no_nan_values, make_clickable_model
from src.display.utils import AutoEvalColumn, EvalQueueColumn
from src.leaderboard.read_evals import get_raw_eval_results
from src.about import (
nc_tasks,
nr_tasks,
lp_tasks,
)
def sanitize_string(input_string):
# Remove leading and trailing whitespace
input_string = input_string.strip()
# Remove leading whitespace on each line
sanitized_string = re.sub(r'(?m)^\s+', '', input_string)
return sanitized_string
'''
def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
"""Creates a dataframe from all the individual experiment results"""
raw_data = get_raw_eval_results(results_path, requests_path)
all_data_json = [v.to_dict() for v in raw_data]
df = pd.DataFrame.from_records(all_data_json)
#df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
#df = df[cols].round(decimals=2)
# filter out if any of the benchmarks have not been produced
#df = df[has_no_nan_values(df, benchmark_cols)]
return raw_data, df
'''
# Function to extract the numerical part before '+'
def extract_x(value):
return float(value.split('+')[0])
# Function to highlight the highest (or lowest) value based on X
def make_bold(df, cols, ascending):
df_highlight = df.copy()
def apply_highlight(s):
if ascending:
max_idx = s.apply(extract_x).idxmin()
else:
max_idx = s.apply(extract_x).idxmax()
return ['font-weight: bold' if i == max_idx else '' for i in range(len(s))]
styler = df_highlight.style.apply(lambda x: apply_highlight(x) if x.name in cols else ['']*len(x), axis=0)
return styler
def format_number(num):
return f"{num:.3f}"
def get_leaderboard_df(EVAL_REQUESTS_PATH, task_type) -> pd.DataFrame:
if task_type in ['Node Classification', 'Entity Classification']:
ascending = False
tasks = nc_tasks
task_type = ['Node Classification', 'Entity Classification']
elif task_type in ['Node Regression', 'Entity Regression']:
ascending = True
tasks = nr_tasks
task_type = ['Node Regression', 'Entity Regression']
elif task_type in ['Link Prediction', 'Recommendation']:
ascending = False
tasks = lp_tasks
task_type = ['Link Prediction', 'Recommendation']
model_result_filepaths = []
for root,_, files in os.walk(EVAL_REQUESTS_PATH):
if len(files) == 0 or any([not f.endswith(".json") for f in files]):
continue
for file in files:
model_result_filepaths.append(os.path.join(root, file))
model_res = []
for model in model_result_filepaths:
import json
with open(model) as f:
out = json.load(f)
if ('task' in out) and (out['task'] in task_type):
model_res.append(out)
for model in model_res:
model["test"] = literal_eval(model["test"].split('}')[0]+'}')
model["valid"] = literal_eval(model["valid"].split('}')[0]+'}')
#model["params"] = int(model["params"])
model['submitted_time'] = model['submitted_time'].split('T')[0]
#model['paper_url'] = '[Link](' + model['paper_url'] + ')'
#model['github_url'] = '[Link](' + model['github_url'] + ')'
name2short_name = {task.value.benchmark: task.value.benchmark for task in tasks}
for model in model_res:
model.update({
name2short_name[i]: (f"{format_number(model['test'][i][0])} ± {format_number(model['test'][i][1])}" if i in model['test'] else '-')
for i in name2short_name
})
columns_to_show = ['model', 'author', 'email', 'paper_url', 'github_url', 'submitted_time'] + list(name2short_name.values())
# Check if model_res is empty
if len(model_res) > 0:
df_res = pd.DataFrame([{col: model[col] for col in columns_to_show} for model in model_res])
else:
# Initialize an empty DataFrame with the desired columns
df_res = pd.DataFrame(columns=columns_to_show)
#df_res = pd.DataFrame([{col: model[col] for col in columns_to_show} for model in model_res])
ranks = df_res[list(name2short_name.values())].rank(ascending = ascending)
df_res.rename(columns={'model': 'Model', 'author': 'Author', 'email': 'Email', 'paper_url': 'Paper URL', 'github_url': 'Github URL', 'submitted_time': 'Time'}, inplace=True)
df_res.Model.replace('Relbench User Study', 'Human Data Scientist', inplace=True)
df_res['Average Rank⬆️'] = ranks.mean(axis=1)
df_res.sort_values(by='Average Rank⬆️', ascending=True, inplace=True)
#df_res = make_bold(df_res, list(name2short_name.values()), ascending = ascending)
print(df_res)
return df_res
def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
"""Creates the different dataframes for the evaluation queues requestes"""
entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
all_evals = []
for entry in entries:
if ".json" in entry:
file_path = os.path.join(save_path, entry)
with open(file_path) as fp:
data = json.load(fp)
data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
all_evals.append(data)
elif ".md" not in entry:
# this is a folder
sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if not e.startswith(".")]
for sub_entry in sub_entries:
file_path = os.path.join(save_path, entry, sub_entry)
with open(file_path) as fp:
data = json.load(fp)
data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
all_evals.append(data)
pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
running_list = [e for e in all_evals if e["status"] == "RUNNING"]
finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
df_running = pd.DataFrame.from_records(running_list, columns=cols)
df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
return df_finished[cols], df_running[cols], df_pending[cols]
|