File size: 1,288 Bytes
06257c8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
from math import log, exp

import numpy as np
from sklearn.metrics import roc_auc_score


def cos_sim_to_prob(sim):
    return (sim + 1) / 2  # linear transformation to 0 and 1


def log_prob_to_prob(log_prob):
    return exp(log_prob)


def prob_to_log_prob(prob):
    return log(prob)


def calculate_auroc(all_disease_probs, gt_diseases):
    '''
    Calculates the AUROC (Area Under the Receiver Operating Characteristic curve) for multiple diseases.

    Parameters:
    all_disease_probs (numpy array): predicted disease labels, a multi-hot vector of shape (N_samples, 14)
    gt_diseases (numpy array): ground truth disease labels, a multi-hot vector of shape (N_samples, 14)

    Returns:
    overall_auroc (float): the overall AUROC score
    per_disease_auroc (numpy array): an array of shape (14,) containing the AUROC score for each disease
    '''

    per_disease_auroc = np.zeros((gt_diseases.shape[1],))  # num of diseases
    for i in range(gt_diseases.shape[1]):
        # Compute the AUROC score for each disease
        per_disease_auroc[i] = roc_auc_score(gt_diseases[:, i], all_disease_probs[:, i])

    # Compute the overall AUROC score
    overall_auroc = roc_auc_score(gt_diseases, all_disease_probs, average='macro')

    return overall_auroc, per_disease_auroc