Spaces:

CZLC
/

mc_auroc

Runtime error

App Files Files Community

mfajcik commited on Feb 28

Commit

439a704

•

1 Parent(s): 0c400ac

Upload mc_auroc.py

Browse files

Files changed (1) hide show

mc_auroc.py +85 -4

mc_auroc.py CHANGED Viewed

@@ -36,7 +36,7 @@ class AVG_MULTICLASS_AUROC(evaluate.Metric):
             features=[
                 datasets.Features(
                     {
-                        "predictions":datasets.Sequence(datasets.Value("float")),
                         "references": datasets.Value("int8")
                     }
                 ),
@@ -46,6 +46,45 @@ class AVG_MULTICLASS_AUROC(evaluate.Metric):
             ],
         )
     def _compute(self, predictions: Sequence[Sequence[float]], references: Sequence[int]):
         """
         Computes the average AUROC score for multi-class classification problems.
@@ -55,16 +94,58 @@ class AVG_MULTICLASS_AUROC(evaluate.Metric):
         n_classes = list(range(len(probabilities[0])))
         fpr = dict()
         tpr = dict()
         roc_auc = dict()
         for i in range(len(n_classes)):
-            fpr[i], tpr[i], _ = roc_curve(y_true=[1 if x == n_classes[i] else 0 for x in references],
-                                          y_score=[prob[i] for prob in probabilities])
             roc_auc[i] = auc(fpr[i], tpr[i])
         # Compute average AUC
         average_auc = numpy.mean(list(roc_auc.values()))
         return {
             "mc_auroc_score": average_auc,
-            "mc_auroc_ci": "Not implemented yet."
         }

             features=[
                 datasets.Features(
                     {
+                        "predictions": datasets.Sequence(datasets.Value("float")),
                         "references": datasets.Value("int8")
                     }
                 ),
             ],
         )
+    def _evaluate_statistics(self, variates, coverage):
+        """Evaluates the left and right margins for a given M-C distribution
+        Parameters
+        ----------
+        variates : numpy.ndarray
+            A 1-D array containing the simulated variates
+        coverage : float
+            A number, between 0 and 1 to indicate the desired coverage.  Typically,
+            this number is set to 0.95 (95% coverage).
+        Returns
+        -------
+        stats : (float, float, float, float)
+            mean, mode and credible intervals for the input simulation
+        """
+        left_half = (1 - coverage) / 2  # size of excluded (half) area
+        sorted_variates = numpy.sort(variates)
+        # n.b.: we return the equally tailed range
+        # calculates position of score which would exclude the left_half (left)
+        lower_index = int(round(len(variates) * left_half))
+        # calculates position of score which would exclude the right_half (right)
+        upper_index = int(round(len(variates) * (1 - left_half)))
+        lower = sorted_variates[lower_index - 1]
+        upper = sorted_variates[upper_index - 1]
+        return lower, upper
     def _compute(self, predictions: Sequence[Sequence[float]], references: Sequence[int]):
         """
         Computes the average AUROC score for multi-class classification problems.
         n_classes = list(range(len(probabilities[0])))
         fpr = dict()
         tpr = dict()
+        thresholds = dict()
         roc_auc = dict()
+        roc_auc_ci_low = dict()
+        roc_auc_ci_high = dict()
         for i in range(len(n_classes)):
+            fpr[i], tpr[i], thresholds[i] = roc_curve(y_true=[1 if x == n_classes[i] else 0 for x in references],
+                                                      y_score=[prob[i] for prob in probabilities])
+            confusion_matrices = self._get_CMs(i, probabilities, references, thresholds)
+            low_ci_tpr, high_ci_tpr = [0] * len(thresholds[i]), [0] * len(thresholds[i])
+            λ = 1.0
+            for k in range(len(thresholds[i])):
+                variates = numpy.random.beta(confusion_matrices[k]["TP"] + λ, confusion_matrices[k]["FN"] + λ, 1000000)
+                low_ci_tpr[k], high_ci_tpr[k] = self._evaluate_statistics(variates, 0.95)
+            roc_auc_ci_low[i] = auc(fpr[i], low_ci_tpr)
+            roc_auc_ci_high[i] = auc(fpr[i], high_ci_tpr)
             roc_auc[i] = auc(fpr[i], tpr[i])
         # Compute average AUC
         average_auc = numpy.mean(list(roc_auc.values()))
+        average_auc_ci_low = numpy.mean(list(roc_auc_ci_low.values()))
+        average_auc_ci_high = numpy.mean(list(roc_auc_ci_high.values()))
         return {
             "mc_auroc_score": average_auc,
+            "mc_auroc_ci": (average_auc_ci_low, average_auc_ci_high)
         }
+    def _get_CMs(self, i, probabilities, references, thresholds):
+        confusion_matrices = []
+        for threshold in thresholds[i]:
+            TP = 0
+            FP = 0
+            TN = 0
+            FN = 0
+            for j in range(len(probabilities)):
+                if probabilities[j][i] >= threshold:
+                    if references[j] == i:
+                        TP += 1
+                    else:
+                        FP += 1
+                else:
+                    if references[j] == i:
+                        FN += 1
+                    else:
+                        TN += 1
+            cm = {"TP": TP, "FP": FP, "TN": TN, "FN": FN, "threshold": threshold, "class": i}
+            confusion_matrices.append(cm)
+        return confusion_matrices