test_cases = [ {"predictions": [[0, 1], [1, 0]], "references": [1, 0], "result": {"ECE": 0}}, {"predictions": [[0, 1], [1, 0]], "references": [0, 1], "result": {"ECE": 1}}, { "predictions": [[0.6, 0.2, 0.2], [0, 0.95, 0.05], [0.75, 0.05 ,0.2]], "references": [0, 1, 2], "result": {"ECE": ((abs((0==0)-0.7) + abs((1==1)-1) + abs((2==0)-0.8))/3)}, #all predictions in separate bins }, { "predictions": [[0.6, 0.2, 0.2], [0, 0.95, 0.05], [0.7, 0.1 ,0.2]], "references": [0, 1, 2], "result": {"ECE": abs((0==0)-0.7 + (2==0)-0.7)/3 + abs((1==1)-1)/3}, #some predictions in same bin }, # DEV: make more advanced tests including differing kwargs