Spaces:
Runtime error
Runtime error
File size: 4,772 Bytes
b06387f 5a94e04 b06387f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
#!/usr/bin/env python3
import os
import sys
import json
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import linkage
from src.backend.envs import Tasks, EVAL_REQUESTS_PATH_BACKEND, EVAL_RESULTS_PATH_BACKEND, DEVICE, LIMIT, Task
from src.envs import QUEUE_REPO, RESULTS_REPO, API
from src.utils import my_snapshot_download
def find_json_files(json_path):
res = []
for root, dirs, files in os.walk(json_path):
for file in files:
if file.endswith(".json"):
res.append(os.path.join(root, file))
return res
my_snapshot_download(repo_id=RESULTS_REPO, revision="main", local_dir=EVAL_RESULTS_PATH_BACKEND, repo_type="dataset", max_workers=60)
my_snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60)
result_path_lst = find_json_files(EVAL_RESULTS_PATH_BACKEND)
request_path_lst = find_json_files(EVAL_REQUESTS_PATH_BACKEND)
model_name_to_model_map = {}
for path in request_path_lst:
with open(path, 'r') as f:
data = json.load(f)
model_name_to_model_map[data["model"]] = data
model_dataset_metric_to_result_map = {}
data_map = {}
for path in result_path_lst:
with open(path, 'r') as f:
data = json.load(f)
model_name = data["config"]["model_name"]
for dataset_name, results_dict in data["results"].items():
for metric_name, value in results_dict.items():
# print(model_name, dataset_name, metric_name, value)
if ',' in metric_name and '_stderr' not in metric_name \
and 'f1' not in metric_name \
and model_name_to_model_map[model_name]["likes"] > 256:
to_add = True
if 'selfcheck' in dataset_name:
if 'max' not in metric_name:
to_add = False
if 'nq_open' in dataset_name or 'triviaqa' in dataset_name:
to_add = False
# pass
# breakpoint()
if 'bertscore' in metric_name:
if 'precision' not in metric_name:
to_add = False
if 'correctness,' in metric_name or 'em,' in metric_name:
to_add = False
if 'rouge' in metric_name:
if 'rougeL' not in metric_name:
to_add = False
if 'ifeval' in dataset_name:
if 'prompt_level_strict_acc' not in metric_name:
to_add = False
if 'squad' in dataset_name:
to_add = False
if 'fever' in dataset_name:
to_add = False
if 'rouge' in metric_name:
value /= 100.0
if to_add:
sanitised_metric_name = metric_name.split(',')[0]
model_dataset_metric_to_result_map[(model_name, dataset_name, sanitised_metric_name)] = value
# if (model_name, dataset_name) not in data_map:
# data_map[(model_name, dataset_name)] = {}
# data_map[(model_name, dataset_name)][metric_name] = value
if model_name not in data_map:
data_map[model_name] = {}
data_map[model_name][(dataset_name, sanitised_metric_name)] = value
print('model_name', model_name, 'dataset_name', dataset_name, 'metric_name', metric_name, 'value', value)
model_name_lst = [m for m in data_map.keys()]
for m in model_name_lst:
if len(data_map[m]) < 8:
del data_map[m]
df = pd.DataFrame.from_dict(data_map, orient='index')
o_df = df.copy(deep=True)
print(df)
# Check for NaN or infinite values and replace them
df.replace([np.inf, -np.inf], np.nan, inplace=True) # Replace infinities with NaN
df.fillna(0, inplace=True) # Replace NaN with 0 (or use another imputation strategy)
from sklearn.preprocessing import MinMaxScaler
# scaler = MinMaxScaler()
# df = pd.DataFrame(scaler.fit_transform(df), index=df.index, columns=df.columns)
sns.set_context("notebook", font_scale=1.0)
# fig = sns.clustermap(df, method='average', metric='cosine', cmap='coolwarm', figsize=(16, 12), annot=True)
fig = sns.clustermap(df, method='ward', metric='euclidean', cmap='coolwarm', figsize=(16, 12), annot=True, mask=o_df.isnull())
# Adjust the size of the cells (less wide)
plt.setp(fig.ax_heatmap.get_yticklabels(), rotation=0)
plt.setp(fig.ax_heatmap.get_xticklabels(), rotation=90)
# Save the clustermap to file
fig.savefig('plots/clustermap.pdf')
fig.savefig('plots/clustermap.png')
|