ribesstefano
commited on
Commit
•
aa57971
1
Parent(s):
251060c
Updated plotting script (but still broken, use notebook instead)
Browse files- src/plot_experiment_results.py +105 -87
src/plot_experiment_results.py
CHANGED
@@ -72,7 +72,10 @@ def plot_training_curves(df, split_type, stage='test', multimodels=False, groupb
|
|
72 |
def plot_performance_metrics(df_cv, df_test, title=None):
|
73 |
|
74 |
# Extract and prepare CV data
|
75 |
-
|
|
|
|
|
|
|
76 |
cv_data = cv_data.melt(id_vars=['model_type', 'fold', 'split_type'], var_name='Metric', value_name='Score')
|
77 |
cv_data['Metric'] = cv_data['Metric'].replace({
|
78 |
'val_acc': 'Validation Accuracy',
|
@@ -170,7 +173,7 @@ def plot_performance_metrics(df_cv, df_test, title=None):
|
|
170 |
plt.savefig(f'plots/{title}.pdf', bbox_inches='tight')
|
171 |
|
172 |
|
173 |
-
def plot_ablation_study(report):
|
174 |
# Define the ablation study combinations
|
175 |
ablation_study_combinations = [
|
176 |
'disabled smiles',
|
@@ -283,7 +286,7 @@ def plot_ablation_study(report):
|
|
283 |
x = 0.4 # p.get_height() - p.get_height() / 2
|
284 |
plt.annotate(value, (x, y), ha='center', va='center', color='black', fontsize=10, alpha=0.8)
|
285 |
|
286 |
-
plt.savefig(f'plots/
|
287 |
|
288 |
|
289 |
def plot_majority_voting_performance(df):
|
@@ -300,93 +303,108 @@ def main():
|
|
300 |
cv_n_folds = 5
|
301 |
|
302 |
active_name = active_col.replace(' ', '_').replace('(', '').replace(')', '').replace(',', '')
|
303 |
-
|
304 |
|
305 |
# Load the data
|
306 |
-
reports = {
|
307 |
-
|
308 |
-
|
309 |
-
pd.read_csv(f'reports/cv_report_{
|
310 |
-
pd.read_csv(f'reports/cv_report_{
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
pd.read_csv(f'reports/test_report_{
|
315 |
-
pd.read_csv(f'reports/test_report_{
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
pd.read_csv(f'reports/
|
320 |
-
pd.read_csv(f'reports/
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
pd.read_csv(f'reports/
|
325 |
-
pd.read_csv(f'reports/
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
333 |
|
334 |
-
|
335 |
-
|
336 |
-
for i in range(n_models_for_test):
|
337 |
-
logs_dir = f'logs_{report_base_name}_{split_type}_best_model_n{i}'
|
338 |
-
metrics = pd.read_csv(f'logs/{logs_dir}/{logs_dir}/metrics.csv')
|
339 |
-
metrics['model_id'] = i
|
340 |
-
# Rename 'val_' columns to 'test_' columns
|
341 |
-
metrics = metrics.rename(columns={'val_loss': 'test_loss', 'val_acc': 'test_acc', 'val_roc_auc': 'test_roc_auc'})
|
342 |
-
# plot_training_curves(metrics, f'{split_type}_best_model_n{i}')
|
343 |
-
split_metrics.append(metrics)
|
344 |
-
plot_training_curves(pd.concat(split_metrics), f'{split_type}_best_model', multimodels=True)
|
345 |
-
|
346 |
-
split_metrics_cv = []
|
347 |
-
for i in range(cv_n_folds):
|
348 |
-
# logs_dir = f'logs_{report_base_name}_{split_type}_best_model_n{i}'
|
349 |
-
logs_dir = f'logs_{report_base_name}_{split_type}_{split_type}_cv_model_fold{i}'
|
350 |
-
metrics = pd.read_csv(f'logs/{logs_dir}/{logs_dir}/metrics.csv')
|
351 |
-
metrics['fold'] = i
|
352 |
-
# plot_training_curves(metrics, f'{split_type}_cv_model_fold{i}', stage='val')
|
353 |
-
split_metrics_cv.append(metrics)
|
354 |
-
plot_training_curves(pd.concat(split_metrics_cv), f'{split_type}_cv_model', stage='val', multimodels=True, groupby='fold')
|
355 |
-
|
356 |
-
plot_performance_metrics(
|
357 |
-
reports['cv_train'],
|
358 |
-
reports['test'],
|
359 |
-
title=f'mean_performance-best_models_as_test',
|
360 |
-
)
|
361 |
-
|
362 |
-
plot_performance_metrics(
|
363 |
-
reports['cv_train'],
|
364 |
-
reports['cv_train'],
|
365 |
-
title=f'mean_performance-cv_models_as_test',
|
366 |
-
)
|
367 |
-
|
368 |
-
plot_performance_metrics(
|
369 |
-
reports['cv_train'],
|
370 |
-
reports['majority_vote'][reports['majority_vote']['cv_models'].isna()],
|
371 |
-
title=f'majority_vote_performance-best_models_as_test',
|
372 |
-
)
|
373 |
-
|
374 |
-
plot_performance_metrics(
|
375 |
-
reports['cv_train'],
|
376 |
-
reports['majority_vote'][reports['majority_vote']['cv_models'] == True],
|
377 |
-
title=f'majority_vote_performance-cv_models_as_test',
|
378 |
-
)
|
379 |
-
|
380 |
-
# plot_majority_voting_performance(reports['majority_vote'])
|
381 |
-
|
382 |
-
reports['test']['disabled_embeddings'] = pd.NA
|
383 |
-
plot_ablation_study(pd.concat([
|
384 |
-
reports['ablation'],
|
385 |
-
reports['test'],
|
386 |
-
]))
|
387 |
-
|
388 |
-
# # Plot hyperparameter optimization results to markdown
|
389 |
-
# print(reports['hparam'][['split_type', 'hidden_dim', 'learning_rate', 'dropout', 'use_smote', 'smote_k_neighbors']].to_markdown(index=False))
|
390 |
|
391 |
|
392 |
if __name__ == '__main__':
|
|
|
72 |
def plot_performance_metrics(df_cv, df_test, title=None):
|
73 |
|
74 |
# Extract and prepare CV data
|
75 |
+
cols = ['model_type', 'fold', 'val_acc', 'val_roc_auc', 'split_type']
|
76 |
+
if 'test_acc' in df_cv.columns:
|
77 |
+
cols.extend(['test_acc', 'test_roc_auc'])
|
78 |
+
cv_data = df_cv[cols]
|
79 |
cv_data = cv_data.melt(id_vars=['model_type', 'fold', 'split_type'], var_name='Metric', value_name='Score')
|
80 |
cv_data['Metric'] = cv_data['Metric'].replace({
|
81 |
'val_acc': 'Validation Accuracy',
|
|
|
173 |
plt.savefig(f'plots/{title}.pdf', bbox_inches='tight')
|
174 |
|
175 |
|
176 |
+
def plot_ablation_study(report, title=''):
|
177 |
# Define the ablation study combinations
|
178 |
ablation_study_combinations = [
|
179 |
'disabled smiles',
|
|
|
286 |
x = 0.4 # p.get_height() - p.get_height() / 2
|
287 |
plt.annotate(value, (x, y), ha='center', va='center', color='black', fontsize=10, alpha=0.8)
|
288 |
|
289 |
+
plt.savefig(f'plots/{title}{group}.pdf', bbox_inches='tight')
|
290 |
|
291 |
|
292 |
def plot_majority_voting_performance(df):
|
|
|
303 |
cv_n_folds = 5
|
304 |
|
305 |
active_name = active_col.replace(' ', '_').replace('(', '').replace(')', '').replace(',', '')
|
306 |
+
dataset_info = f'{active_name}_test_split_{test_split}'
|
307 |
|
308 |
# Load the data
|
309 |
+
reports = {}
|
310 |
+
for experiment in ['', 'xgboost_', 'cellsonehot_', 'aminoacidcnt_']:
|
311 |
+
reports[f'{experiment}cv_train'] = pd.concat([
|
312 |
+
pd.read_csv(f'reports/{experiment}cv_report_{dataset_info}_standard.csv'),
|
313 |
+
pd.read_csv(f'reports/{experiment}cv_report_{dataset_info}_target.csv'),
|
314 |
+
pd.read_csv(f'reports/{experiment}cv_report_{dataset_info}_similarity.csv'),
|
315 |
+
])
|
316 |
+
reports[f'{experiment}test'] = pd.concat([
|
317 |
+
pd.read_csv(f'reports/{experiment}test_report_{dataset_info}_standard.csv'),
|
318 |
+
pd.read_csv(f'reports/{experiment}test_report_{dataset_info}_target.csv'),
|
319 |
+
pd.read_csv(f'reports/{experiment}test_report_{dataset_info}_similarity.csv'),
|
320 |
+
])
|
321 |
+
reports[f'{experiment}hparam'] = pd.concat([
|
322 |
+
pd.read_csv(f'reports/{experiment}hparam_report_{dataset_info}_standard.csv'),
|
323 |
+
pd.read_csv(f'reports/{experiment}hparam_report_{dataset_info}_target.csv'),
|
324 |
+
pd.read_csv(f'reports/{experiment}hparam_report_{dataset_info}_similarity.csv'),
|
325 |
+
])
|
326 |
+
reports[f'{experiment}majority_vote'] = pd.concat([
|
327 |
+
pd.read_csv(f'reports/{experiment}majority_vote_report_{dataset_info}_standard.csv'),
|
328 |
+
pd.read_csv(f'reports/{experiment}majority_vote_report_{dataset_info}_target.csv'),
|
329 |
+
pd.read_csv(f'reports/{experiment}majority_vote_report_{dataset_info}_similarity.csv'),
|
330 |
+
])
|
331 |
+
if experiment != 'xgboost_':
|
332 |
+
reports[f'{experiment}ablation'] = pd.concat([
|
333 |
+
pd.read_csv(f'reports/{experiment}ablation_report_{dataset_info}_standard.csv'),
|
334 |
+
pd.read_csv(f'reports/{experiment}ablation_report_{dataset_info}_target.csv'),
|
335 |
+
pd.read_csv(f'reports/{experiment}ablation_report_{dataset_info}_similarity.csv'),
|
336 |
+
])
|
337 |
+
|
338 |
+
for experiment in ['', 'xgboost_', 'cellsonehot_', 'aminoacidcnt_']:
|
339 |
+
print('=' * 80)
|
340 |
+
print(f'Experiment: {experiment}')
|
341 |
+
print('=' * 80)
|
342 |
+
|
343 |
+
# Plot training curves
|
344 |
+
for split_type in ['standard', 'similarity', 'target']:
|
345 |
+
# Skip XGBoost: we don't have its training curves
|
346 |
+
if experiment != 'xgboost_':
|
347 |
+
# Plot training curves for the best models
|
348 |
+
split_metrics = []
|
349 |
+
for i in range(n_models_for_test):
|
350 |
+
metrics_dir = f'best_model_n{i}_{experiment}{split_type}_{dataset_info}'
|
351 |
+
metrics = pd.read_csv(f'logs/{metrics_dir}/{metrics_dir}/metrics.csv')
|
352 |
+
metrics['model_id'] = i
|
353 |
+
# Rename 'val_' columns to 'test_' columns
|
354 |
+
metrics = metrics.rename(columns={'val_loss': 'test_loss', 'val_acc': 'test_acc', 'val_roc_auc': 'test_roc_auc'})
|
355 |
+
split_metrics.append(metrics)
|
356 |
+
plot_training_curves(pd.concat(split_metrics), f'{experiment}{split_type}_best_model', multimodels=True)
|
357 |
+
|
358 |
+
# Plot training curves for the CV models
|
359 |
+
split_metrics_cv = []
|
360 |
+
for i in range(cv_n_folds):
|
361 |
+
metrics_dir = f'cv_model_{experiment}{split_type}_{dataset_info}_fold{i}'
|
362 |
+
metrics = pd.read_csv(f'logs/{metrics_dir}/{metrics_dir}/metrics.csv')
|
363 |
+
metrics['fold'] = i
|
364 |
+
split_metrics_cv.append(metrics)
|
365 |
+
plot_training_curves(pd.concat(split_metrics_cv), f'{experiment}{split_type}_cv_model', stage='val', multimodels=True, groupby='fold')
|
366 |
+
|
367 |
+
if experiment != 'xgboost_':
|
368 |
+
# Skip XGBoost: we don't have test data for its CV models
|
369 |
+
plot_performance_metrics(
|
370 |
+
reports[f'{experiment}cv_train'],
|
371 |
+
reports[f'{experiment}cv_train'],
|
372 |
+
title=f'{experiment}mean_performance-cv_models_as_test',
|
373 |
+
)
|
374 |
+
plot_performance_metrics(
|
375 |
+
reports[f'{experiment}cv_train'],
|
376 |
+
reports[f'{experiment}majority_vote'][reports[f'{experiment}majority_vote']['cv_models'] == True],
|
377 |
+
title=f'{experiment}majority_vote_performance-cv_models_as_test',
|
378 |
+
)
|
379 |
+
# Skip XGBoost: we don't have its ablation study
|
380 |
+
reports[f'{experiment}test']['disabled_embeddings'] = pd.NA
|
381 |
+
plot_ablation_study(
|
382 |
+
pd.concat([
|
383 |
+
reports[f'{experiment}ablation'],
|
384 |
+
reports[f'{experiment}test'],
|
385 |
+
]),
|
386 |
+
title=f'{experiment}ablation_study_',
|
387 |
+
)
|
388 |
+
|
389 |
+
plot_performance_metrics(
|
390 |
+
reports[f'{experiment}cv_train'],
|
391 |
+
reports[f'{experiment}test'],
|
392 |
+
title=f'{experiment}mean_performance-best_models_as_test',
|
393 |
+
)
|
394 |
+
|
395 |
+
#
|
396 |
+
if experiment == 'xgboost_':
|
397 |
+
df_test = reports[f'{experiment}majority_vote']
|
398 |
+
else:
|
399 |
+
df_test = reports[f'{experiment}majority_vote'][reports[f'{experiment}majority_vote']['cv_models'].isna()]
|
400 |
+
plot_performance_metrics(
|
401 |
+
reports[f'{experiment}cv_train'],
|
402 |
+
df_test,
|
403 |
+
title=f'{experiment}majority_vote_performance-best_models_as_test',
|
404 |
+
)
|
405 |
|
406 |
+
# # Plot hyperparameter optimization results to markdown
|
407 |
+
# print(reports['hparam'][['split_type', 'hidden_dim', 'learning_rate', 'dropout', 'use_smote', 'smote_k_neighbors']].to_markdown(index=False))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
408 |
|
409 |
|
410 |
if __name__ == '__main__':
|