|
from math import log, exp |
|
|
|
import numpy as np |
|
from sklearn.metrics import roc_auc_score |
|
|
|
|
|
def cos_sim_to_prob(sim): |
|
return (sim + 1) / 2 |
|
|
|
|
|
def log_prob_to_prob(log_prob): |
|
return exp(log_prob) |
|
|
|
|
|
def prob_to_log_prob(prob): |
|
return log(prob) |
|
|
|
|
|
def calculate_auroc(all_disease_probs, gt_diseases): |
|
''' |
|
Calculates the AUROC (Area Under the Receiver Operating Characteristic curve) for multiple diseases. |
|
|
|
Parameters: |
|
all_disease_probs (numpy array): predicted disease labels, a multi-hot vector of shape (N_samples, 14) |
|
gt_diseases (numpy array): ground truth disease labels, a multi-hot vector of shape (N_samples, 14) |
|
|
|
Returns: |
|
overall_auroc (float): the overall AUROC score |
|
per_disease_auroc (numpy array): an array of shape (14,) containing the AUROC score for each disease |
|
''' |
|
|
|
per_disease_auroc = np.zeros((gt_diseases.shape[1],)) |
|
for i in range(gt_diseases.shape[1]): |
|
|
|
per_disease_auroc[i] = roc_auc_score(gt_diseases[:, i], all_disease_probs[:, i]) |
|
|
|
|
|
overall_auroc = roc_auc_score(gt_diseases, all_disease_probs, average='macro') |
|
|
|
return overall_auroc, per_disease_auroc |
|
|