Spaces:

jordyvl
/

ece

Runtime error

App Files Files Community

jordyvl commited on Jun 30, 2022

Commit

897eeff

•

1 Parent(s): 6a39113

Fix to reliability diagram - correct with test

Browse files

Files changed (2) hide show

app.py +26 -30
ece.py +23 -8

app.py CHANGED Viewed

@@ -48,34 +48,31 @@ def reliability_plot(results):
     # DEV: nicer would be to plot like a polygon
     # see: https://github.com/markus93/fit-on-the-test/blob/main/Experiments_Synthetic/binnings.py
-    def over_under_confidence(results):
-        colors = []
-        for j, bin in enumerate(results["y_bar"]):
-            perfect = results["y_bar"][j]
-            empirical = results["p_bar"][j]
-            bin_color = (
-                "limegreen"
-                if np.allclose(perfect, empirical)
-                else "dodgerblue"
-                if empirical < perfect
-                else "orangered"
-            )
-            colors.append(bin_color)
-        return colors
     fig, ax1, ax2 = default_plot()
     # Bin differences
     bins_with_left_edge = np.insert(results["y_bar"], 0, 0, axis=0)
-    B, bins, patches = ax1.hist(
-        results["y_bar"],
-        weights=np.nan_to_num(results["p_bar"][:-1], copy=True, nan=0),
-        bins=bins_with_left_edge,
     )
-    colors = over_under_confidence(results)
-    for b in range(len(B)):
-        patches[b].set_facecolor(colors[b])  # color based on over/underconfidence
     ax1handles = [
         mpatches.Patch(color="orangered", label="Overconfident"),
@@ -84,12 +81,11 @@ def reliability_plot(results):
     ]
     # Bin frequencies
-    anindices = np.where(~np.isnan(results["p_bar"][:-1]))[0]
-    n_bins = len(results["y_bar"])
-    bin_freqs = np.zeros(n_bins)
     bin_freqs[anindices] = results["bin_freq"]
-    B, newbins, patches = ax2.hist(
-        results["y_bar"], weights=bin_freqs, color="midnightblue", bins=bins_with_left_edge
     )
     acc_plt = ax2.axvline(x=results["accuracy"], ls="solid", lw=3, c="black", label="Accuracy")
@@ -148,8 +144,8 @@ component = gr.inputs.Dataframe(
 )
 component.value = [
-    [[0.63, 0.2, 0.2], 0],
-    [[0.73, 0.1, 0.2], 2],
     [[0, 0.95, 0.05], 1],
 ]
 sample_data = [[component] + slider_defaults]

     # DEV: nicer would be to plot like a polygon
     # see: https://github.com/markus93/fit-on-the-test/blob/main/Experiments_Synthetic/binnings.py
     fig, ax1, ax2 = default_plot()
     # Bin differences
     bins_with_left_edge = np.insert(results["y_bar"], 0, 0, axis=0)
+    bins_with_right_edge = np.insert(results["y_bar"], -1, 1.0, axis=0)
+    bins_with_leftright_edge = np.insert(bins_with_left_edge, -1, 1.0, axis=0)
+    weights = np.nan_to_num(results["p_bar"], copy=True, nan=0)
+    # NOTE: the histogram API is strange
+    _, _, patches = ax1.hist(
+        bins_with_left_edge,
+        weights=weights,
+        bins=bins_with_leftright_edge,
     )
+    for b in range(len(patches)):
+        perfect = bins_with_right_edge[b]  # if b != n_bins else
+        empirical = weights[b]  # patches[b]._height
+        bin_color = (
+            "limegreen"
+            if perfect == empirical
+            else "dodgerblue"
+            if empirical < perfect
+            else "orangered"
+        )
+        patches[b].set_facecolor(bin_color)  # color based on over/underconfidence
     ax1handles = [
         mpatches.Patch(color="orangered", label="Overconfident"),
     ]
     # Bin frequencies
+    anindices = np.where(~np.isnan(results["p_bar"]))[0]
+    bin_freqs = np.zeros(len(results["p_bar"]))
     bin_freqs[anindices] = results["bin_freq"]
+    ax2.hist(
+        bins_with_left_edge, weights=bin_freqs, color="midnightblue", bins=bins_with_leftright_edge
     )
     acc_plt = ax2.axvline(x=results["accuracy"], ls="solid", lw=3, c="black", label="Accuracy")
 )
 component.value = [
+    [[0.6, 0.2, 0.2], 0],
+    [[0.7, 0.1, 0.2], 2],
     [[0, 0.95, 0.05], 1],
 ]
 sample_data = [[component] + slider_defaults]

ece.py CHANGED Viewed

@@ -21,7 +21,6 @@ import numpy as np
 from typing import Dict, Optional
 # TODO: Add BibTeX citation
 _CITATION = """\
 @InProceedings{huggingface:module,
@@ -103,9 +102,9 @@ def create_bins(n_bins=10, scheme="equal-range", bin_range=None, P=None):
         # rightmost entry per equal size group
         for cur_group in range(n_bins - 1):
             bin_upper_edges += [max(groups[cur_group])]
-        bin_upper_edges += [1.01] #[np.inf]  # always +1 for right edges
         bins = np.array(bin_upper_edges)
-        #OverflowError: cannot convert float infinity to integer
     return bins
@@ -200,7 +199,14 @@ def top_1_CE(Y, P, **kwargs):
     )
     CE = CE_estimate(y_correct, p_max, bins=bins, proxy=kwargs["proxy"], detail=kwargs["detail"])
     if kwargs["detail"]:
-        return {"ECE": CE[0], "y_bar": CE[1], "p_bar": CE[2], "bin_freq": CE[3], "p_bar_cont": np.mean(p_max,-1), "accuracy": np.mean(y_correct)}
     return CE
@@ -306,9 +312,18 @@ def test_ECE():
     print(f"ECE: {res['ECE']}")
     res = ECE()._compute(predictions, references, detail=True)
-    import pdb; pdb.set_trace()  # breakpoint 25274412 //
     print(f"ECE: {res['ECE']}")
-if __name__ == '__main__':
-    test_ECE()

 from typing import Dict, Optional
 # TODO: Add BibTeX citation
 _CITATION = """\
 @InProceedings{huggingface:module,
         # rightmost entry per equal size group
         for cur_group in range(n_bins - 1):
             bin_upper_edges += [max(groups[cur_group])]
+        bin_upper_edges += [1.01]  # [np.inf]  # always +1 for right edges
         bins = np.array(bin_upper_edges)
+        # OverflowError: cannot convert float infinity to integer
     return bins
     )
     CE = CE_estimate(y_correct, p_max, bins=bins, proxy=kwargs["proxy"], detail=kwargs["detail"])
     if kwargs["detail"]:
+        return {
+            "ECE": CE[0],
+            "y_bar": CE[1],
+            "p_bar": CE[2],
+            "bin_freq": CE[3],
+            "p_bar_cont": np.mean(p_max, -1),
+            "accuracy": np.mean(y_correct),
+        }
     return CE
     print(f"ECE: {res['ECE']}")
     res = ECE()._compute(predictions, references, detail=True)
     print(f"ECE: {res['ECE']}")
+def test_deterministic():
+    res = ECE()._compute(
+        references=[0, 1, 2],
+        predictions=[[0.63, 0.2, 0.2], [0, 0.95, 0.05], [0.72, 0.1, 0.2]],
+        detail=True,
+    )
+    print(f"ECE: {res['ECE']}\n {res}")
+if __name__ == "__main__":
+    test_deterministic()
+    test_ECE()