""" This testing script loads actual probabilisitic predictions from a resnet finetuned on CIFAR There are a number of logits-groundtruth pickles available @ https://github.com/markus93/NN_calibration/tree/master/logits [Seems to have moved from Git-LFS to sharepoint] https://tartuulikool-my.sharepoint.com/:f:/g/personal/markus93_ut_ee/EmW0xbhcic5Ou0lRbTrySOUBF2ccSsN7lo6lvSfuG1djew?e=l0TErb See https://github.com/markus93/NN_calibration/blob/master/logits/Readme.txt to decode the [model_dataset] filenames As a bonus, one could consider temperature scaling and measuring after calibration. """ import sys import numpy as np import scipy.stats as stats from scipy.special import softmax import pickle from sklearn.model_selection import train_test_split from matplotlib import pyplot as plt from ece import create_bins, discretize_into_bins, ECE # Open file with pickled variables def unpickle_probs(file, verbose=0, normalize=True): with open(file, "rb") as f: # Python 3: open(..., 'rb') y1, y2 = pickle.load(f) # unpickle the content if isinstance(y1, tuple): y_probs_val, y_val = y1 y_probs_test, y_test = y2 else: y_probs_val, y_probs_test, y_val, y_test = train_test_split( y1, y2.reshape(-1, 1), test_size=len(y2) - 5000, random_state=15 ) # Splits the data in the case of pretrained models if normalize: y_probs_val = softmax(y_probs_val, -1) y_probs_test = softmax(y_probs_test, -1) if verbose: print( "y_probs_val:", y_probs_val.shape ) # (5000, 10); Validation set probabilities of predictions print("y_true_val:", y_val.shape) # (5000, 1); Validation set true labels print("y_probs_test:", y_probs_test.shape) # (10000, 10); Test set probabilities print("y_true_test:", y_test.shape) # (10000, 1); Test set true labels return ((y_probs_val, y_val.ravel()), (y_probs_test, y_test.ravel())) def unpickle_structured_probs(valpath=None, testpath=None): valpath = "/home/jordy/code/gordon/arkham/arkham/StructuredCalibration/models/jordyvl/bert-base-cased_conll2003-sm-first-ner_validation_UTY.pickle" testpath = "/home/jordy/code/gordon/arkham/arkham/StructuredCalibration/models/jordyvl/bert-base-cased_conll2003-sm-first-ner_test_UTY.pickle" with open(valpath, "rb") as f: X_val, _, y_val, _ = pickle.load(f) with open(testpath, "rb") as f: X_test, _, y_test, _ = pickle.load(f) X_val = np.log(X_val) # originally exponentiated [different purposes] X_test = np.log(X_test) # originally exponentiated [different purposes] # structured logits """ ALTERNATE equal mass binning """ # Define data types. from typing import List, Tuple, NewType, TypeVar Data = List[Tuple[float, float]] # List of (predicted_probability, true_label). Bins = List[float] # List of bin boundaries, excluding 0.0, but including 1.0. BinnedData = List[Data] # binned_data[i] contains the data in bin i. T = TypeVar('T') eps = 1e-6 def split(sequence: List[T], parts: int) -> List[List[T]]: assert parts <= len(sequence), "more bins than probabilities" part_size = int(np.ceil(len(sequence) * 1.0 / parts)) assert part_size * parts >= len(sequence), "no missing instances when partitioning" assert (part_size - 1) * parts < len(sequence), "dropping 1 does not make for missing" return [sequence[i:i + part_size] for i in range(0, len(sequence), part_size)] def get_equal_bins(probs: List[float], n_bins: int=10) -> Bins: """Get bins that contain approximately an equal number of data points.""" sorted_probs = sorted(probs) binned_data = split(sorted_probs, n_bins) bins: Bins = [] for i in range(len(binned_data) - 1): last_prob = binned_data[i][-1] next_first_prob = binned_data[i + 1][0] bins.append((last_prob + next_first_prob) / 2.0) bins.append(1.0) bins = sorted(list(set(bins))) #this is the special thing! return bins def histedges_equalN(x, nbin): npt = len(x) return np.interp(np.linspace(0, npt, nbin + 1), np.arange(npt), np.sort(x)) ''' bin_upper_edges = histedges_equalN(P, n_bins) #n, bins, patches = plt.hist(x, histedges_equalN(x, 10)) ''' def test_equalmass_binning(P, Y): #probs = np.array([0.63, 0.2, 0.2, 0, 0.95, 0.05, 0.72, 0.1, 0.2]) kwargs = dict( n_bins= 10, scheme="equal-mass", bin_range=None, proxy="upper-edge", #proxy="center", p=1, detail=True, ) if P.ndim == 2: #can assume ECE p_max = np.max(P, -1) # create p̂ as top-1 softmax probability € [0,1] eqr_bins = create_bins(n_bins=kwargs["n_bins"], scheme="equal-range", bin_range=kwargs["bin_range"], P=p_max) eqm_bins = create_bins(n_bins=kwargs["n_bins"], scheme=kwargs["scheme"], bin_range=kwargs["bin_range"], P=p_max) #alternate_eqm_bins = get_equal_bins(p_max, kwargs["n_bins"]) eqr_hist = np.digitize(p_max, eqr_bins, right=True) eqm_hist = np.digitize(p_max, eqm_bins, right=True) eqml_hist = np.digitize(p_max, eqm_bins, right=False) #eqm_bins = [0] + eqm_bins other_hist = discretize_into_bins(np.expand_dims(p_max, 0), eqm_bins) hist_difference = stats.power_divergence(eqr_hist, eqm_hist, lambda_="pearson") #chisquare #plt.hist(eqr_hist, color="green", label="equal-range") plt.hist(eqm_hist, color="blue", label="equal-mass") plt.legend() #plt.show() res = ECE()._compute(P, Y, **kwargs) print(f"eqm ECE: {res['ECE']}") kwargs["scheme"] = "equal-range" res = ECE()._compute(P, Y, **kwargs) print(f"eqr ECE: {res['ECE']}") # res = ECE()._compute(predictions, references, detail=True) # print(f"ECE: {res['ECE']}") if __name__ == "__main__": FILE_PATH = sys.argv[1] if len(sys.argv) > 1 else "resnet110_c10_logits.p" (p_val, y_val), (p_test, y_test) = unpickle_probs(FILE_PATH, False, True) test_equalmass_binning(p_val, y_val) # do on val