|
|
import json |
|
|
import re |
|
|
import os |
|
|
|
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
import pickle |
|
|
|
|
|
from urllib.parse import quote |
|
|
from pathlib import Path |
|
|
import re |
|
|
import html |
|
|
from typing import Dict, Any |
|
|
from scipy.stats import sem |
|
|
|
|
|
from utils.constants import (NORM_BASE_SUBMISSION, DATASETS, DIGITS_FOR_VALUES, DIGITS_FOR_ERRORS, |
|
|
DIMENSIONS, COLUMN_ORDER, MODEL_INFO_FILE, RESULTS_DIR) |
|
|
from utils import compute_tools |
|
|
|
|
|
|
|
|
|
|
|
def load_results(folder: str = RESULTS_DIR, |
|
|
items_to_ignore: list = ["__pycache__", "compiled.pkl", ".DS_Store"] |
|
|
): |
|
|
""" |
|
|
loads results from results folder. |
|
|
|
|
|
Args: |
|
|
folder: folder containing results |
|
|
items_to_ignore: list of items in results folder to ignore |
|
|
""" |
|
|
|
|
|
with open(MODEL_INFO_FILE) as f: |
|
|
model_info = json.load(f) |
|
|
model_size = model_info["MODEL_SIZE"] |
|
|
backbone_names = model_info["BACKBONE_NAMES"] |
|
|
|
|
|
|
|
|
all_submissions = os.listdir(folder) |
|
|
for item in items_to_ignore: |
|
|
if item in all_submissions: all_submissions.remove(item) |
|
|
|
|
|
all_submission_results = {} |
|
|
|
|
|
all_full_ft_model_names = [] |
|
|
all_frozen_model_names = [] |
|
|
all_submission_results["frozen"] = {} |
|
|
all_submission_results["full_ft"] = {} |
|
|
for submission in all_submissions: |
|
|
combined_results = pd.read_csv(f"{folder}/{submission}/results_and_parameters.csv") |
|
|
combined_results = combined_results.drop(["index"], errors='ignore') |
|
|
try: |
|
|
frozen_or_full_ft = combined_results["frozen_or_full_ft"][0] |
|
|
except KeyError as e: |
|
|
KeyError(f"{combined_results=}") |
|
|
all_submission_results[frozen_or_full_ft][submission] = {} |
|
|
|
|
|
combined_results["# params"] = combined_results.apply(lambda row: model_size[row.backbone], axis=1) |
|
|
combined_results["Model"] = combined_results.apply(lambda row: backbone_names[row.backbone], axis=1) |
|
|
combined_results["Config Settings"] = combined_results.apply(lambda row: get_config_setting_string(row), axis=1) |
|
|
|
|
|
|
|
|
all_backbones = list(set(combined_results["backbone"].tolist())) |
|
|
all_submission_results[frozen_or_full_ft][submission]["results"] = combined_results |
|
|
all_submission_results[frozen_or_full_ft][submission]["all_backbones"] = all_backbones |
|
|
|
|
|
config_settings = combined_results[["early_stop_patience", "decoder", "n_trials", "data_percentages", "batch_size_selection"]].iloc[0] |
|
|
config_settings = config_settings.replace("early_stopping_50", "50").replace("n_trials_16", "16").replace("data_100_perc", "100") |
|
|
all_submission_results[frozen_or_full_ft][submission]["config_info"] = config_settings |
|
|
|
|
|
|
|
|
if frozen_or_full_ft =="frozen": |
|
|
all_frozen_model_names.extend(all_backbones) |
|
|
else: |
|
|
all_full_ft_model_names.extend(all_backbones) |
|
|
|
|
|
all_frozen_model_names = list(set(all_frozen_model_names)) |
|
|
all_full_ft_model_names = list(set(all_full_ft_model_names)) |
|
|
all_model_names = {"full_ft": all_full_ft_model_names, "frozen": all_frozen_model_names} |
|
|
return all_submission_results, all_model_names, all_submissions |
|
|
|
|
|
|
|
|
|
|
|
def compute_all_iqms( |
|
|
all_submission_results: dict, |
|
|
benchmark_name: str, |
|
|
dataset_group_keys:list =["backbone", "dataset"], |
|
|
overall_group_keys:list = ["backbone"], |
|
|
metric:str ="test metric", |
|
|
) -> Dict: |
|
|
""" |
|
|
- reads combined results from repeated seeds for multiple models |
|
|
- computes the raw and normalized IQM by dataset for each model by task type |
|
|
- computes the raw and normalized overall IQM across multiple datasets in each each task type |
|
|
|
|
|
Args: |
|
|
all_submission_results: dict containing all results |
|
|
benchmark_name: name of normalizer file to be used |
|
|
dataset_group_keys: grouping for computing dataset IQM |
|
|
overall_group_keys: grouping for computing overall IQM |
|
|
metric: the column containing scores/values in the combined results tables |
|
|
""" |
|
|
output = {} |
|
|
for submission in all_submission_results: |
|
|
output[submission] = {} |
|
|
print(f'\n\n\n{submission=}') |
|
|
submission_backbones = all_submission_results[submission]["all_backbones"] |
|
|
|
|
|
|
|
|
partition_name = "0.10x train" if "data_10_perc" in submission else "1.00x train" |
|
|
submission_results = all_submission_results[submission]["results"] |
|
|
if not "partition name" in list(submission_results.columns): |
|
|
submission_results["partition name"] = partition_name |
|
|
submission_results["partition name"] = partition_name |
|
|
|
|
|
|
|
|
series = submission_results.groupby(dataset_group_keys)[metric].apply(np.mean) |
|
|
raw_per_dataset = series.to_frame().reset_index() |
|
|
raw_per_dataset = raw_per_dataset.drop(columns=["partition name"], errors='ignore') |
|
|
included_datsets = [d for d in DATASETS if d in set(raw_per_dataset["dataset"])] |
|
|
raw_per_dataset_final = pd.DataFrame(columns=["backbone"] + included_datsets) |
|
|
|
|
|
|
|
|
series = submission_results.groupby(dataset_group_keys)[metric].apply(sem) |
|
|
raw_per_dataset_err = series.to_frame().reset_index() |
|
|
raw_per_dataset_err = raw_per_dataset_err.drop(columns=["partition name"], errors='ignore') |
|
|
raw_per_dataset_final_err = pd.DataFrame(columns=["backbone"] + included_datsets) |
|
|
|
|
|
|
|
|
for backbone in submission_backbones: |
|
|
|
|
|
data = raw_per_dataset.loc[raw_per_dataset["backbone"] == backbone] |
|
|
data = data.drop(columns=["backbone"]).rename(columns={metric: backbone, "dataset": "backbone"}) |
|
|
data = data.set_index(['backbone']).T.reset_index() |
|
|
data = data.rename(columns={"index": "backbone"}) |
|
|
try: |
|
|
data = data.loc[:, ["backbone"] + included_datsets] |
|
|
except KeyError as e: |
|
|
print(f'{backbone} {e=}') |
|
|
continue |
|
|
|
|
|
raw_per_dataset_final = data.copy() if len(raw_per_dataset_final.index)==0 else pd.concat([raw_per_dataset_final, data], ignore_index=True) |
|
|
|
|
|
|
|
|
data_err = raw_per_dataset_err.loc[raw_per_dataset_err["backbone"] == backbone] |
|
|
data_err = data_err.drop(columns=["backbone"]).rename(columns={metric: backbone, "dataset": "backbone"}) |
|
|
data_err = data_err.set_index(['backbone']).T.reset_index() |
|
|
data_err = data_err.rename(columns={"index": "backbone"}) |
|
|
data_err = data_err.loc[:, ["backbone"] + included_datsets] |
|
|
raw_per_dataset_final_err = data_err.copy() if len(raw_per_dataset_final_err.index)==0 else pd.concat([raw_per_dataset_final_err, data_err], ignore_index=True) |
|
|
|
|
|
raw_per_dataset_final = raw_per_dataset_final.reset_index(drop=True).rename_axis(mapper=None, axis='columns') |
|
|
raw_per_dataset_final_err = raw_per_dataset_final_err.reset_index(drop=True).rename_axis(mapper=None, axis='columns') |
|
|
raw_per_dataset_final = raw_per_dataset_final.reindex(columns=["backbone"]+DATASETS, fill_value=np.nan) |
|
|
raw_per_dataset_final_err = raw_per_dataset_final_err.reindex(columns=["backbone"]+DATASETS, fill_value=np.nan) |
|
|
|
|
|
|
|
|
normalizer = compute_tools.load_normalizer(benchmark_name=benchmark_name) |
|
|
new_metric = normalizer.normalize_data_frame(df=submission_results, metric=metric) |
|
|
|
|
|
|
|
|
series = submission_results.groupby(dataset_group_keys)[new_metric].apply(compute_tools.iqm) |
|
|
normalized_per_dataset = series.to_frame().reset_index() |
|
|
normalized_per_dataset = normalized_per_dataset.drop(columns=["partition name"], errors='ignore') |
|
|
included_datsets = [d for d in DATASETS if d in set(normalized_per_dataset["dataset"])] |
|
|
normalized_per_dataset_final = pd.DataFrame(columns=["backbone"] + included_datsets) |
|
|
|
|
|
|
|
|
series = submission_results.groupby(dataset_group_keys)[new_metric].apply(compute_tools.trimmed_sem) |
|
|
normalized_per_dataset_err = series.to_frame().reset_index() |
|
|
normalized_per_dataset_err = normalized_per_dataset_err.drop(columns=["partition name"], errors='ignore') |
|
|
normalized_per_dataset_final_err = pd.DataFrame(columns=["backbone"] + included_datsets) |
|
|
|
|
|
|
|
|
for backbone in submission_backbones: |
|
|
|
|
|
data = normalized_per_dataset.loc[normalized_per_dataset["backbone"] == backbone] |
|
|
data = data.drop(columns=["backbone"]).rename(columns={new_metric: backbone, "dataset": "backbone"}) |
|
|
data = data.set_index(['backbone']).T.reset_index() |
|
|
data = data.rename(columns={"index": "backbone"}) |
|
|
try: |
|
|
data = data.loc[:, ["backbone"] + included_datsets] |
|
|
except KeyError as e: |
|
|
print(f'{backbone} {e=}') |
|
|
continue |
|
|
normalized_per_dataset_final = data.copy() if len(normalized_per_dataset_final.index)==0 else pd.concat([normalized_per_dataset_final, data], ignore_index=True) |
|
|
|
|
|
|
|
|
data_err = normalized_per_dataset_err.loc[normalized_per_dataset["backbone"] == backbone] |
|
|
data_err = data_err.drop(columns=["backbone"]).rename(columns={new_metric: backbone, "dataset": "backbone"}) |
|
|
data_err = data_err.set_index(['backbone']).T.reset_index() |
|
|
data_err = data_err.rename(columns={"index": "backbone"}) |
|
|
data_err = data_err.loc[:, ["backbone"] + included_datsets] |
|
|
normalized_per_dataset_final_err = data_err.copy() if len(normalized_per_dataset_final_err.index)==0 else pd.concat([normalized_per_dataset_final_err, data_err], ignore_index=True) |
|
|
|
|
|
normalized_per_dataset_final = normalized_per_dataset_final.reset_index(drop=True).rename_axis(mapper=None, axis='columns') |
|
|
normalized_per_dataset_final_err = normalized_per_dataset_final_err.reset_index(drop=True).rename_axis(mapper=None, axis='columns') |
|
|
normalized_per_dataset_final =normalized_per_dataset_final.reindex(columns=["backbone"]+DATASETS, fill_value=np.nan) |
|
|
normalized_per_dataset_final_err =normalized_per_dataset_final_err.reindex(columns=["backbone"]+DATASETS, fill_value=np.nan) |
|
|
|
|
|
|
|
|
normalized_overall = pd.DataFrame(columns=["backbone"]) |
|
|
normalized_overall_std_err = pd.DataFrame(columns=["backbone"]) |
|
|
submission_dimensions = [] |
|
|
for dimension in DIMENSIONS: |
|
|
dimension_data = submission_results.loc[submission_results["dataset"].isin(DIMENSIONS[dimension])].copy() |
|
|
dimension_datasets = sorted(set(dimension_data["dataset"])) |
|
|
dimension_backbones = sorted(set(dimension_data["backbone"])) |
|
|
exclude_backbone = [] |
|
|
for backbone in dimension_backbones: |
|
|
backbone_datasets = dimension_data.loc[dimension_data["backbone"] == backbone]["dataset"].tolist() |
|
|
if set(backbone_datasets) != set(dimension_datasets): |
|
|
|
|
|
exclude_backbone.append(backbone) |
|
|
|
|
|
dimension_datasets = [True if d in dimension_datasets else False for d in DIMENSIONS[dimension]] |
|
|
|
|
|
if all(dimension_datasets): |
|
|
submission_dimensions.append(dimension) |
|
|
|
|
|
|
|
|
normalized_iqms_dimension = compute_tools.bootstrap_iqm_aggregate(dimension_data, metric= new_metric) |
|
|
series = normalized_iqms_dimension.groupby(overall_group_keys)[new_metric].apply(np.mean) |
|
|
normalized_iqms_dimension = series.to_frame().reset_index() |
|
|
normalized_iqms_dimension = normalized_iqms_dimension.rename(columns={new_metric: dimension}) |
|
|
normalized_iqms_dimension.loc[normalized_iqms_dimension["backbone"].isin(exclude_backbone), dimension, ] = np.nan |
|
|
|
|
|
|
|
|
|
|
|
normalized_dimension_std_err = compute_tools.bootstrap_iqm_aggregate(dimension_data, metric=new_metric) |
|
|
series = normalized_dimension_std_err.groupby(["backbone"])[new_metric].apply(sem) |
|
|
|
|
|
|
|
|
|
|
|
normalized_dimension_std_err = series.to_frame().reset_index() |
|
|
normalized_dimension_std_err = normalized_dimension_std_err.drop(columns=["partition name"], errors='ignore') |
|
|
normalized_dimension_std_err = normalized_dimension_std_err.rename(columns={new_metric: dimension}) |
|
|
normalized_dimension_std_err.loc[normalized_dimension_std_err["backbone"].isin(exclude_backbone), dimension] = np.nan |
|
|
|
|
|
|
|
|
|
|
|
else: |
|
|
normalized_iqms_dimension = pd.DataFrame({ |
|
|
"backbone": submission_backbones, |
|
|
dimension: [np.nan] * len(submission_backbones), |
|
|
}) |
|
|
normalized_dimension_std_err = pd.DataFrame({ |
|
|
"backbone": submission_backbones, |
|
|
dimension: [np.nan] * len(submission_backbones), |
|
|
}) |
|
|
|
|
|
normalized_iqms_dimension.sort_values(by=['backbone'], inplace=True) |
|
|
normalized_dimension_std_err.sort_values(by=['backbone'], inplace=True) |
|
|
normalized_overall = normalized_iqms_dimension.copy() if len(normalized_overall.index)==0 else normalized_overall.merge(normalized_iqms_dimension, how="left", on="backbone") |
|
|
normalized_overall_std_err = normalized_dimension_std_err.copy() if len(normalized_overall_std_err.index)==0 else normalized_overall_std_err.merge(normalized_dimension_std_err, how="left", on="backbone") |
|
|
|
|
|
output[submission]["raw_per_dataset"] = raw_per_dataset_final |
|
|
output[submission]["normalized_per_dataset"] = normalized_per_dataset_final |
|
|
output[submission]["normalized_overall"] = normalized_overall |
|
|
output[submission]["raw_per_dataset_err"] = raw_per_dataset_final_err |
|
|
output[submission]["normalized_per_dataset_err"] = normalized_per_dataset_final_err |
|
|
output[submission]["normalized_overall_err"] = normalized_overall_std_err |
|
|
output[submission]["submission_dimensions"] = submission_dimensions |
|
|
return output |
|
|
|
|
|
|
|
|
|
|
|
def format_values(x): |
|
|
x = x*100 |
|
|
x = round(x,1) |
|
|
return x |
|
|
|
|
|
|
|
|
def format_errors(x): |
|
|
x = x*100 |
|
|
x = round(x,1) |
|
|
return x |
|
|
|
|
|
|
|
|
def get_config_setting_string(row) -> str: |
|
|
config_settings = f""" |
|
|
Early Stop Patience: {row.early_stop_patience} / |
|
|
Decoder: {row.decoder} / |
|
|
# trials: {row.n_trials} / |
|
|
Data : {row.data_percentages}% / |
|
|
Batch Size Selection: {row.batch_size_selection} |
|
|
""" |
|
|
config_settings = config_settings.replace("early_stopping_50", "50").replace("n_trials_16", "16").replace("data_100_perc", "100") |
|
|
return config_settings |
|
|
|
|
|
|
|
|
def get_overall_performance_table(all_submission_results: dict, |
|
|
all_iqms: dict |
|
|
) -> Dict: |
|
|
""" |
|
|
create tables for 'Aggregated Performance' page. |
|
|
|
|
|
Args: |
|
|
all_submission_results: dict containing all results |
|
|
all_iqms: dict containing all computed results |
|
|
|
|
|
""" |
|
|
output = {} |
|
|
result_type = ["normalized"] |
|
|
for value in result_type: |
|
|
all_tables = [] |
|
|
all_tables_err = [] |
|
|
for submission in all_submission_results: |
|
|
|
|
|
submission_data = all_iqms[submission][f"{value}_overall"].copy() |
|
|
submission_data["Model"] = "-" |
|
|
submission_data["# params"] = "-" |
|
|
submission_data["submission"] = submission |
|
|
|
|
|
submission_data_err = all_iqms[submission][f"{value}_overall_err"].copy() |
|
|
submission_data_err["Config Settings"] = "-" |
|
|
submission_data_err["Model"] = "-" |
|
|
submission_data_err["# params"] = "-" |
|
|
submission_data_err["submission"] = submission |
|
|
|
|
|
|
|
|
parameters = all_submission_results[submission]["results"] |
|
|
for backbone in all_submission_results[submission]["all_backbones"]: |
|
|
submission_data.loc[submission_data["backbone"] == backbone, "Model"] = parameters.loc[parameters["backbone"] == backbone]["Model"].tolist()[0] |
|
|
submission_data.loc[submission_data["backbone"] == backbone, "# params"] = parameters.loc[parameters["backbone"] == backbone]["# params"].tolist()[0] |
|
|
|
|
|
submission_data_err.loc[submission_data_err["backbone"] == backbone, "Model"] = parameters.loc[parameters["backbone"] == backbone]["Model"].tolist()[0] |
|
|
submission_data_err.loc[submission_data_err["backbone"] == backbone, "# params"] = parameters.loc[parameters["backbone"] == backbone]["# params"].tolist()[0] |
|
|
all_tables.append(submission_data) |
|
|
all_tables_err.append(submission_data_err) |
|
|
print(f'\n\n\n {submission} {value} {submission_data[["Core", "Detection (Object/Instance)", "Model", "submission"]].head(50)=}') |
|
|
|
|
|
all_tables = pd.concat(all_tables) |
|
|
all_tables_err = pd.concat(all_tables_err) |
|
|
all_tables.loc[:, COLUMN_ORDER[value]["overall_table"]] = all_tables[COLUMN_ORDER[value]["overall_table"]].round(DIGITS_FOR_VALUES).apply(lambda series: series.apply(format_values)) |
|
|
all_tables_err.loc[:, COLUMN_ORDER[value]["overall_table"]]= all_tables_err[COLUMN_ORDER[value]["overall_table"]].round(DIGITS_FOR_ERRORS).apply(lambda series: series.apply(format_errors)) |
|
|
all_tables = all_tables[COLUMN_ORDER["all_tables"] + COLUMN_ORDER[value]["overall_table"]] |
|
|
all_tables_err = all_tables_err[COLUMN_ORDER["all_tables"] + COLUMN_ORDER[value]["overall_table"]] |
|
|
for col in COLUMN_ORDER[value]["overall_table"]: |
|
|
new_column = f"{col}" |
|
|
all_tables = all_tables.rename(columns={col: new_column}) |
|
|
all_tables_err = all_tables_err.rename(columns={col: new_column}) |
|
|
output[value] = all_tables |
|
|
output[f"{value}_err"] = all_tables_err |
|
|
return output |
|
|
|
|
|
|
|
|
def get_performance_by_dimension_table(all_submission_results: dict, |
|
|
all_iqms: dict |
|
|
) -> Dict: |
|
|
""" |
|
|
create tables for 'Capabilities' page. |
|
|
|
|
|
Args: |
|
|
all_submission_results: dict containing all results |
|
|
all_iqms: dict containing all computed results |
|
|
|
|
|
""" |
|
|
output = {} |
|
|
result_type = ["normalized"] |
|
|
for value in result_type: |
|
|
all_tables = {} |
|
|
all_tables_err = {} |
|
|
for dimension in DIMENSIONS: |
|
|
dimension_tables = [] |
|
|
dimension_tables_err = [] |
|
|
for submission in all_submission_results: |
|
|
|
|
|
submission_data = all_iqms[submission][f"{value}_per_dataset"][DIMENSIONS[dimension]+["backbone"]].copy() |
|
|
dimension_results = all_iqms[submission][f"{value}_overall"][[dimension]+["backbone"]].copy() |
|
|
submission_data = submission_data.merge(dimension_results, how="left", on="backbone") |
|
|
submission_data["Model"] = "-" |
|
|
submission_data["# params"] = "-" |
|
|
submission_data["submission"] = submission |
|
|
|
|
|
submission_data_err = all_iqms[submission][f"{value}_per_dataset_err"][DIMENSIONS[dimension]+["backbone"]].copy() |
|
|
dimension_results_err = all_iqms[submission][f"{value}_overall_err"][[dimension]+["backbone"]].copy() |
|
|
submission_data_err = submission_data_err.merge(dimension_results_err, how="left", on="backbone") |
|
|
submission_data_err["Model"] = "-" |
|
|
submission_data_err["# params"] = "-" |
|
|
submission_data_err["submission"] = submission |
|
|
|
|
|
|
|
|
parameters = all_submission_results[submission]["results"] |
|
|
for backbone in all_submission_results[submission]["all_backbones"]: |
|
|
submission_data.loc[submission_data["backbone"] == backbone, "Model"] = parameters.loc[parameters["backbone"] == backbone]["Model"].tolist()[0] |
|
|
submission_data.loc[submission_data["backbone"] == backbone, "# params"] = parameters.loc[parameters["backbone"] == backbone]["# params"].tolist()[0] |
|
|
|
|
|
submission_data_err.loc[submission_data_err["backbone"] == backbone, "Model"] = parameters.loc[parameters["backbone"] == backbone]["Model"].tolist()[0] |
|
|
submission_data_err.loc[submission_data_err["backbone"] == backbone, "# params"] = parameters.loc[parameters["backbone"] == backbone]["# params"].tolist()[0] |
|
|
dimension_tables.append(submission_data) |
|
|
dimension_tables_err.append(submission_data_err) |
|
|
|
|
|
|
|
|
dimension_tables = pd.concat(dimension_tables) |
|
|
dimension_tables.loc[:, DIMENSIONS[dimension]] = dimension_tables[DIMENSIONS[dimension]].round(DIGITS_FOR_VALUES).apply(lambda series: series.apply(format_values)) |
|
|
dimension_tables.loc[:, dimension] = dimension_tables[dimension].round(DIGITS_FOR_VALUES).apply(format_values) |
|
|
dimension_tables = dimension_tables[COLUMN_ORDER["all_tables"] + [dimension] + COLUMN_ORDER[value]["dimension_tables"] + DIMENSIONS[dimension]] |
|
|
new_column = f"{dimension}" |
|
|
dimension_tables = dimension_tables.rename(columns={dimension: new_column}) |
|
|
all_tables[dimension] = dimension_tables |
|
|
|
|
|
dimension_tables_err = pd.concat(dimension_tables_err) |
|
|
dimension_tables_err.loc[:, DIMENSIONS[dimension]] = dimension_tables_err[DIMENSIONS[dimension]].round(DIGITS_FOR_ERRORS).apply(lambda series: series.apply(format_errors)) |
|
|
dimension_tables_err.loc[:, dimension] = dimension_tables_err[dimension].round(DIGITS_FOR_ERRORS).apply(format_errors) |
|
|
dimension_tables_err = dimension_tables_err[COLUMN_ORDER["all_tables"] + [dimension] + COLUMN_ORDER[value]["dimension_tables"] + DIMENSIONS[dimension]] |
|
|
dimension_tables_err = dimension_tables_err.rename(columns={dimension: new_column}) |
|
|
all_tables_err[f"{dimension}_err"] = dimension_tables_err |
|
|
|
|
|
output[value] = all_tables |
|
|
output[f"{value}_err"] = all_tables_err |
|
|
return output |
|
|
|
|
|
|
|
|
def get_datasets_tables(all_submission_results: dict, |
|
|
all_iqms: dict |
|
|
) -> Dict: |
|
|
""" |
|
|
creates tables for dataset tab. |
|
|
|
|
|
Args: |
|
|
all_submission_results: dict containing all results |
|
|
all_iqms: dict containing all computed results |
|
|
""" |
|
|
output = {} |
|
|
result_type = ["normalized","raw"] |
|
|
for value in result_type: |
|
|
all_tables = {} |
|
|
all_tables_err = {} |
|
|
for dataset in DATASETS: |
|
|
dataset_tables = [] |
|
|
dataset_tables_err = [] |
|
|
for submission in all_submission_results: |
|
|
|
|
|
submission_data = all_iqms[submission][f"{value}_per_dataset"][["backbone", dataset]].copy() |
|
|
submission_data["Model"] = "-" |
|
|
submission_data["# params"] = "-" |
|
|
submission_data["submission"] = submission |
|
|
|
|
|
submission_data_err = all_iqms[submission][f"{value}_per_dataset_err"][["backbone", dataset]].copy() |
|
|
submission_data_err["Model"] = "-" |
|
|
submission_data_err["# params"] = "-" |
|
|
submission_data_err["submission"] = submission |
|
|
|
|
|
|
|
|
parameters = all_submission_results[submission]["results"] |
|
|
new_column = "IQM" if value == "normalized" else "Mean" |
|
|
|
|
|
for backbone in all_submission_results[submission]["all_backbones"]: |
|
|
submission_data.loc[submission_data["backbone"] == backbone, "Model"] = parameters.loc[parameters["backbone"] == backbone]["Model"].tolist()[0] |
|
|
submission_data.loc[submission_data["backbone"] == backbone, "# params"] = parameters.loc[parameters["backbone"] == backbone]["# params"].tolist()[0] |
|
|
submission_data = submission_data.rename(columns={dataset: new_column}) |
|
|
|
|
|
submission_data_err.loc[submission_data_err["backbone"] == backbone, "Model"] = parameters.loc[parameters["backbone"] == backbone]["Model"].tolist()[0] |
|
|
submission_data_err.loc[submission_data_err["backbone"] == backbone, "# params"] = parameters.loc[parameters["backbone"] == backbone]["# params"].tolist()[0] |
|
|
submission_data_err = submission_data_err.rename(columns={dataset: new_column}) |
|
|
|
|
|
dataset_tables.append(submission_data) |
|
|
dataset_tables_err.append(submission_data_err) |
|
|
column = "IQM" if value == "normalized" else "Mean" |
|
|
dataset_tables = pd.concat(dataset_tables) |
|
|
dataset_tables.loc[:, column] = dataset_tables[column].round(DIGITS_FOR_VALUES).apply(format_values) |
|
|
all_tables[dataset] = dataset_tables[COLUMN_ORDER["all_tables"] + COLUMN_ORDER[value]["dataset_tables"]] |
|
|
|
|
|
dataset_tables_err = pd.concat(dataset_tables_err) |
|
|
dataset_tables_err.loc[:, column] = dataset_tables_err[column].round(DIGITS_FOR_ERRORS).apply(format_errors) |
|
|
all_tables_err[dataset] = dataset_tables_err[COLUMN_ORDER["all_tables"] + COLUMN_ORDER[value]["dataset_tables"]] |
|
|
|
|
|
output[value] = all_tables |
|
|
output[f"{value}_err"] = all_tables_err |
|
|
return output |
|
|
|
|
|
|
|
|
def get_submission_tables(all_submission_results: dict): |
|
|
output = {} |
|
|
frozen_or_full_ft = ["frozen" ,"full_ft"] |
|
|
config_info = [] |
|
|
for method in frozen_or_full_ft: |
|
|
for sub in all_submission_results[method]: |
|
|
config = all_submission_results[method][sub]["config_info"] |
|
|
config = config.to_frame().T |
|
|
config["submission"] = sub |
|
|
config["backbone method"] = method |
|
|
config_info.append(config) |
|
|
output = pd.concat(config_info) |
|
|
output = output[COLUMN_ORDER["submission_info"]] |
|
|
return output |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
all_submission_results, all_model_names, all_submissions = load_results(folder=RESULTS_DIR) |
|
|
|
|
|
|
|
|
norm_base_results= [] |
|
|
for method in NORM_BASE_SUBMISSION: |
|
|
for sub in NORM_BASE_SUBMISSION[method]: |
|
|
norm_base_results.append(all_submission_results[method][sub]["results"].copy()) |
|
|
norm_base_results = pd.concat(norm_base_results) |
|
|
benchmark_name = "leaderboard_combined" |
|
|
compute_tools.make_normalizer(norm_base_results.reset_index(), |
|
|
metrics=("test metric",), |
|
|
benchmark_name=benchmark_name) |
|
|
|
|
|
overall_performance_tables = {} |
|
|
performance_by_dimension_tables = {} |
|
|
datasets_tables = {} |
|
|
for method in ["full_ft","frozen"]: |
|
|
method_iqms = compute_all_iqms( |
|
|
all_submission_results = all_submission_results[method], |
|
|
benchmark_name = benchmark_name, |
|
|
) |
|
|
|
|
|
|
|
|
overall_performance_tables[method] = get_overall_performance_table(all_submission_results=all_submission_results[method], |
|
|
all_iqms=method_iqms) |
|
|
performance_by_dimension_tables[method] = get_performance_by_dimension_table(all_submission_results=all_submission_results[method], |
|
|
all_iqms=method_iqms) |
|
|
datasets_tables[method] = get_datasets_tables(all_submission_results=all_submission_results[method], |
|
|
all_iqms=method_iqms) |
|
|
|
|
|
submission_info_table = get_submission_tables(all_submission_results=all_submission_results) |
|
|
|
|
|
compiled_results = { |
|
|
"overall_performance_tables": overall_performance_tables, |
|
|
"performance_by_dimension_tables": performance_by_dimension_tables, |
|
|
"datasets_tables": datasets_tables, |
|
|
"submission_info_table": submission_info_table |
|
|
} |
|
|
|
|
|
with open(f'{RESULTS_DIR}/compiled.pkl', 'wb') as handle: |
|
|
pickle.dump(compiled_results, handle, protocol=pickle.HIGHEST_PROTOCOL) |
|
|
|