Xplainer / utils.py
ChantalPellegrini's picture
first commit
06257c8
from math import log, exp
import numpy as np
from sklearn.metrics import roc_auc_score
def cos_sim_to_prob(sim):
return (sim + 1) / 2 # linear transformation to 0 and 1
def log_prob_to_prob(log_prob):
return exp(log_prob)
def prob_to_log_prob(prob):
return log(prob)
def calculate_auroc(all_disease_probs, gt_diseases):
'''
Calculates the AUROC (Area Under the Receiver Operating Characteristic curve) for multiple diseases.
Parameters:
all_disease_probs (numpy array): predicted disease labels, a multi-hot vector of shape (N_samples, 14)
gt_diseases (numpy array): ground truth disease labels, a multi-hot vector of shape (N_samples, 14)
Returns:
overall_auroc (float): the overall AUROC score
per_disease_auroc (numpy array): an array of shape (14,) containing the AUROC score for each disease
'''
per_disease_auroc = np.zeros((gt_diseases.shape[1],)) # num of diseases
for i in range(gt_diseases.shape[1]):
# Compute the AUROC score for each disease
per_disease_auroc[i] = roc_auc_score(gt_diseases[:, i], all_disease_probs[:, i])
# Compute the overall AUROC score
overall_auroc = roc_auc_score(gt_diseases, all_disease_probs, average='macro')
return overall_auroc, per_disease_auroc