import os import numpy as np import tensorflow as tf from sklearn.metrics import roc_auc_score, f1_score, accuracy_score, precision_score, recall_score import argparse import json import pandas as pd # Function to compute additional metrics like AUC, Precision, Recall, and F1 Score def compute_additional_metrics(X, Y, model): predictions = model.predict(X).flatten() predictions_binary = (predictions > 0.5).astype(int) # Convert probabilities to class labels (0 or 1) auc = roc_auc_score(Y, predictions) precision = precision_score(Y, predictions_binary) recall = recall_score(Y, predictions_binary) f1 = f1_score(Y, predictions_binary) return auc, precision, recall, f1, predictions # Function to evaluate the model on a given dataset def evaluate_dataset(model, X, Y, dataset_name, save_dir): eval_metrics = model.evaluate(X, Y, verbose=0) auc, precision, recall, f1, predictions = compute_additional_metrics(X, Y, model) metrics = { 'loss': eval_metrics[0], 'accuracy': eval_metrics[1], 'auc': auc, 'precision': precision, 'recall': recall, 'f1_score': f1 } # Save the predictions for each sample np.savez_compressed(os.path.join(save_dir, f'{dataset_name}_predictions.npz'), predictions=predictions, labels=Y) return metrics # Function to evaluate the model on train, validate, and test datasets def evaluate_all_datasets(model, train_X, train_Y, validate_X, validate_Y, test_X, test_Y, save_dir): train_metrics = evaluate_dataset(model, train_X, train_Y, "train", save_dir) validate_metrics = evaluate_dataset(model, validate_X, validate_Y, "validate", save_dir) test_metrics = evaluate_dataset(model, test_X, test_Y, "test", save_dir) metrics = { 'train': train_metrics, 'validate': validate_metrics, 'test': test_metrics } # Display the metrics in a tabular format metrics_df = pd.DataFrame(metrics).T print(metrics_df.to_string()) # Save metrics to a JSON file with open(os.path.join(save_dir, 'evaluation_metrics.json'), 'w') as f: json.dump(metrics, f, indent=4) print("Evaluation metrics saved to evaluation_metrics.json") return metrics if __name__ == "__main__": # Command line arguments parser = argparse.ArgumentParser(description='Evaluate a trained multiple instance learning classifier on risk data.') parser.add_argument('--data_file', type=str, required=True, help='Path to the saved .npz file with training, validation, and test data.') parser.add_argument('--model_path', type=str, required=True, help='Path to the saved model file.') parser.add_argument('--save_dir', type=str, default='./evaluation_results/', help='Directory to save the evaluation results.') args = parser.parse_args() if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # Load the preprocessed data data = np.load(args.data_file) train_X, train_Y = data['train_X'], data['train_Y'] validate_X, validate_Y = data['validate_X'], data['validate_Y'] test_X, test_Y = data['test_X'], data['test_Y'] # Load the saved model model = tf.keras.models.load_model(args.model_path) # Evaluate the model metrics = evaluate_all_datasets(model, train_X, train_Y, validate_X, validate_Y, test_X, test_Y, args.save_dir)