"""
This testing script loads actual probabilisitic predictions from a resnet finetuned on CIFAR

There are a number of logits-groundtruth pickles available @ https://github.com/markus93/NN_calibration/tree/master/logits
[Seems to have moved from Git-LFS to sharepoint]
https://tartuulikool-my.sharepoint.com/:f:/g/personal/markus93_ut_ee/EmW0xbhcic5Ou0lRbTrySOUBF2ccSsN7lo6lvSfuG1djew?e=l0TErb

See https://github.com/markus93/NN_calibration/blob/master/logits/Readme.txt to decode the [model_dataset] filenames

As a bonus, one could consider temperature scaling and measuring after calibration.
"""
import sys
import numpy as np
import scipy.stats as stats
from scipy.special import softmax
import pickle
from sklearn.model_selection import train_test_split

from matplotlib import pyplot as plt

from ece import create_bins, discretize_into_bins, ECE


# Open file with pickled variables
def unpickle_probs(file, verbose=0, normalize=True):
    with open(file, "rb") as f:  # Python 3: open(..., 'rb')
        y1, y2 = pickle.load(f)  # unpickle the content

    if isinstance(y1, tuple):
        y_probs_val, y_val = y1
        y_probs_test, y_test = y2
    else:
        y_probs_val, y_probs_test, y_val, y_test = train_test_split(
            y1, y2.reshape(-1, 1), test_size=len(y2) - 5000, random_state=15
        )  # Splits the data in the case of pretrained models

    if normalize:
        y_probs_val = softmax(y_probs_val, -1)
        y_probs_test = softmax(y_probs_test, -1)

    if verbose:
        print(
            "y_probs_val:", y_probs_val.shape
        )  # (5000, 10); Validation set probabilities of predictions
        print("y_true_val:", y_val.shape)  # (5000, 1); Validation set true labels
        print("y_probs_test:", y_probs_test.shape)  # (10000, 10); Test set probabilities
        print("y_true_test:", y_test.shape)  # (10000, 1); Test set true labels

    return ((y_probs_val, y_val.ravel()), (y_probs_test, y_test.ravel()))


def unpickle_structured_probs(valpath=None, testpath=None):
    valpath = "/home/jordy/code/gordon/arkham/arkham/StructuredCalibration/models/jordyvl/bert-base-cased_conll2003-sm-first-ner_validation_UTY.pickle"
    testpath = "/home/jordy/code/gordon/arkham/arkham/StructuredCalibration/models/jordyvl/bert-base-cased_conll2003-sm-first-ner_test_UTY.pickle"

    with open(valpath, "rb") as f:
        X_val, _, y_val, _ = pickle.load(f)

    with open(testpath, "rb") as f:
        X_test, _, y_test, _ = pickle.load(f)

    X_val = np.log(X_val)  # originally exponentiated [different purposes]
    X_test = np.log(X_test)  # originally exponentiated [different purposes]
    # structured logits


"""
ALTERNATE equal mass binning
"""
# Define data types.
from typing import List, Tuple, NewType, TypeVar
Data = List[Tuple[float, float]]  # List of (predicted_probability, true_label).
Bins = List[float]  # List of bin boundaries, excluding 0.0, but including 1.0.
BinnedData = List[Data]  # binned_data[i] contains the data in bin i.
T = TypeVar('T')

eps = 1e-6

def split(sequence: List[T], parts: int) -> List[List[T]]:
    assert parts <= len(sequence), "more bins than probabilities"
    part_size = int(np.ceil(len(sequence) * 1.0 / parts))
    assert part_size * parts >= len(sequence), "no missing instances when partitioning"
    assert (part_size - 1) * parts < len(sequence), "dropping 1 does not make for missing"
    return [sequence[i:i + part_size] for i in range(0, len(sequence), part_size)]


def get_equal_bins(probs: List[float], n_bins: int=10) -> Bins:
    """Get bins that contain approximately an equal number of data points."""
    sorted_probs = sorted(probs)
    binned_data = split(sorted_probs, n_bins)
    bins: Bins = []
    for i in range(len(binned_data) - 1):
        last_prob = binned_data[i][-1]
        next_first_prob = binned_data[i + 1][0]
        bins.append((last_prob + next_first_prob) / 2.0)
    bins.append(1.0)
    bins = sorted(list(set(bins))) #this is the special thing!
    return bins

def histedges_equalN(x, nbin):
    npt = len(x)
    return np.interp(np.linspace(0, npt, nbin + 1),
                     np.arange(npt),
                     np.sort(x))

    '''
    bin_upper_edges = histedges_equalN(P, n_bins)
    #n, bins, patches = plt.hist(x, histedges_equalN(x, 10))
    '''


def test_equalmass_binning(P, Y):
    #probs = np.array([0.63, 0.2, 0.2, 0, 0.95, 0.05, 0.72, 0.1, 0.2])

    kwargs = dict(
        n_bins= 10,
        scheme="equal-mass",
        bin_range=None,
        proxy="upper-edge",
        #proxy="center",
        p=1,
        detail=True,
    )

    if P.ndim == 2: #can assume ECE
        p_max = np.max(P, -1)  # create p̂ as top-1 softmax probability € [0,1]

    eqr_bins = create_bins(n_bins=kwargs["n_bins"], scheme="equal-range", bin_range=kwargs["bin_range"], P=p_max)
    eqm_bins = create_bins(n_bins=kwargs["n_bins"], scheme=kwargs["scheme"], bin_range=kwargs["bin_range"], P=p_max)
    #alternate_eqm_bins = get_equal_bins(p_max, kwargs["n_bins"])


    eqr_hist = np.digitize(p_max, eqr_bins, right=True)
    eqm_hist = np.digitize(p_max, eqm_bins, right=True)    
    eqml_hist = np.digitize(p_max, eqm_bins, right=False)

    #eqm_bins = [0] + eqm_bins

    other_hist = discretize_into_bins(np.expand_dims(p_max, 0), eqm_bins)
    hist_difference = stats.power_divergence(eqr_hist, eqm_hist, lambda_="pearson") #chisquare

    #plt.hist(eqr_hist, color="green", label="equal-range")
    plt.hist(eqm_hist, color="blue", label="equal-mass")
    plt.legend()
    #plt.show()


    res = ECE()._compute(P, Y, **kwargs)
    print(f"eqm ECE: {res['ECE']}")

    kwargs["scheme"] = "equal-range"
    res = ECE()._compute(P, Y, **kwargs)
    print(f"eqr ECE: {res['ECE']}")

    # res = ECE()._compute(predictions, references, detail=True)
    # print(f"ECE: {res['ECE']}")


if __name__ == "__main__":
    FILE_PATH = sys.argv[1] if len(sys.argv) > 1 else "resnet110_c10_logits.p"
    (p_val, y_val), (p_test, y_test) = unpickle_probs(FILE_PATH, False, True)
    test_equalmass_binning(p_val, y_val)
    # do on val