Zero_Shot_Classifier_Tester_for_TP_FP / Reports /report-Model-1_Queries-0_Prompt-1_Strategies-argmax-threshold0.05-threshold0.25-threshold0.5-threshold0.75-topk9-topk7-topk5-topk3.csv
gorkaartola's picture
Upload report-Model-1_Queries-0_Prompt-1_Strategies-argmax-threshold0.05-threshold0.25-threshold0.5-threshold0.75-topk9-topk7-topk5-topk3.csv
b71280f
argmax_max,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,0,0,0.0,0.0,0.0,0.5
2,2,288,288,38,13,0.13194444444444445,0.7450980392156863,0.22418879056047197,0.5434027777777778
3,3,87,87,14,11,0.16091954022988506,0.56,0.25,0.5172413793103449
4,4,94,94,6,1,0.06382978723404255,0.8571428571428571,0.1188118811881188,0.526595744680851
5,5,62,62,0,1,0.0,0.0,0.0,0.49193548387096775
6,6,63,63,6,0,0.09523809523809523,1.0,0.17391304347826084,0.5476190476190477
7,7,17,17,5,4,0.29411764705882354,0.5555555555555556,0.3846153846153846,0.5294117647058824
8,8,65,65,13,14,0.2,0.48148148148148145,0.28260869565217395,0.49230769230769234
9,9,31,31,16,9,0.5161290322580645,0.64,0.5714285714285714,0.6129032258064516
10,10,57,57,9,5,0.15789473684210525,0.6428571428571429,0.25352112676056343,0.5350877192982456
11,11,48,48,6,3,0.125,0.6666666666666666,0.21052631578947367,0.53125
12,12,36,36,9,4,0.25,0.6923076923076923,0.3673469387755102,0.5694444444444444
13,13,17,17,7,5,0.4117647058823529,0.5833333333333334,0.4827586206896552,0.5588235294117647
14,14,77,77,9,5,0.11688311688311688,0.6428571428571429,0.1978021978021978,0.525974025974026
15,15,40,40,0,1,0.0,0.0,0.0,0.4875
16,16,29,29,16,18,0.5517241379310345,0.47058823529411764,0.507936507936508,0.46551724137931033
17,total,1043,1043,154,94,,,,
18,,,,,Micro avg.,0.1476510067114094,0.6209677419354839,0.23857474825716501,0.5287631831255992
19,,,,,Macro avg.,0.18090854376482146,0.5022287145124515,0.23679165145158182,0.5255890633286358
threshold-0.05,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,1,0,0.03333333333333333,1.0,0.06451612903225806,0.5166666666666667
2,2,288,288,99,88,0.34375,0.5294117647058824,0.41684210526315785,0.5190972222222222
3,3,87,87,71,60,0.8160919540229885,0.5419847328244275,0.6513761467889909,0.5632183908045977
4,4,94,94,33,23,0.35106382978723405,0.5892857142857143,0.44,0.5531914893617021
5,5,62,62,11,12,0.1774193548387097,0.4782608695652174,0.25882352941176473,0.49193548387096775
6,6,63,63,47,18,0.746031746031746,0.7230769230769231,0.7343749999999999,0.7301587301587301
7,7,17,17,10,10,0.5882352941176471,0.5,0.5405405405405405,0.5
8,8,65,65,43,42,0.6615384615384615,0.5058823529411764,0.5733333333333334,0.5076923076923077
9,9,31,31,26,24,0.8387096774193549,0.52,0.6419753086419753,0.532258064516129
10,10,57,57,46,28,0.8070175438596491,0.6216216216216216,0.7022900763358778,0.6578947368421053
11,11,48,48,41,37,0.8541666666666666,0.5256410256410257,0.6507936507936508,0.5416666666666666
12,12,36,36,31,30,0.8611111111111112,0.5081967213114754,0.6391752577319588,0.5138888888888888
13,13,17,17,12,12,0.7058823529411765,0.5,0.5853658536585366,0.5
14,14,77,77,57,45,0.7402597402597403,0.5588235294117647,0.6368715083798882,0.577922077922078
15,15,40,40,4,10,0.1,0.2857142857142857,0.14814814814814817,0.425
16,16,29,29,27,26,0.9310344827586207,0.5094339622641509,0.6585365853658537,0.5172413793103449
17,total,1043,1043,559,465,,,,
18,,,,,Micro avg.,0.535953978906999,0.5458984375,0.5408805031446541,0.5450623202301055
19,,,,,Macro avg.,0.5620967969815553,0.523372559021392,0.49076253961329025,0.5381077708778477
threshold-0.25,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,0,0,0.0,0.0,0.0,0.5
2,2,288,288,60,36,0.20833333333333334,0.625,0.3125,0.5416666666666666
3,3,87,87,46,36,0.5287356321839081,0.5609756097560976,0.5443786982248521,0.5574712643678161
4,4,94,94,17,3,0.18085106382978725,0.85,0.29824561403508776,0.574468085106383
5,5,62,62,7,5,0.11290322580645161,0.5833333333333334,0.18918918918918917,0.5161290322580645
6,6,63,63,37,7,0.5873015873015873,0.8409090909090909,0.6915887850467289,0.7380952380952381
7,7,17,17,8,7,0.47058823529411764,0.5333333333333333,0.5,0.5294117647058824
8,8,65,65,31,31,0.47692307692307695,0.5,0.4881889763779527,0.5
9,9,31,31,16,11,0.5161290322580645,0.5925925925925926,0.5517241379310345,0.5806451612903226
10,10,57,57,29,16,0.5087719298245614,0.6444444444444445,0.5686274509803921,0.6140350877192983
11,11,48,48,27,20,0.5625,0.574468085106383,0.5684210526315789,0.5729166666666666
12,12,36,36,20,13,0.5555555555555556,0.6060606060606061,0.5797101449275361,0.5972222222222222
13,13,17,17,8,6,0.47058823529411764,0.5714285714285714,0.5161290322580646,0.5588235294117647
14,14,77,77,18,12,0.23376623376623376,0.6,0.3364485981308411,0.538961038961039
15,15,40,40,1,5,0.025,0.16666666666666666,0.04347826086956522,0.45
16,16,29,29,23,22,0.7931034482758621,0.5111111111111111,0.6216216216216216,0.5172413793103449
17,total,1043,1043,348,230,,,,
18,,,,,Micro avg.,0.3336529242569511,0.6020761245674741,0.4293645897594077,0.5565675934803451
19,,,,,Macro avg.,0.366532387626274,0.5153131438083665,0.4006030330720262,0.5521815962812769
threshold-0.5,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,0,0,0.0,0.0,0.0,0.5
2,2,288,288,31,13,0.1076388888888889,0.7045454545454546,0.18674698795180725,0.53125
3,3,87,87,8,8,0.09195402298850575,0.5,0.1553398058252427,0.5
4,4,94,94,4,0,0.0425531914893617,1.0,0.08163265306122448,0.5212765957446809
5,5,62,62,3,2,0.04838709677419355,0.6,0.08955223880597016,0.5080645161290323
6,6,63,63,18,4,0.2857142857142857,0.8181818181818182,0.4235294117647059,0.6111111111111112
7,7,17,17,4,3,0.23529411764705882,0.5714285714285714,0.3333333333333333,0.5294117647058824
8,8,65,65,20,20,0.3076923076923077,0.5,0.380952380952381,0.5
9,9,31,31,8,5,0.25806451612903225,0.6153846153846154,0.36363636363636365,0.5483870967741935
10,10,57,57,14,6,0.24561403508771928,0.7,0.3636363636363636,0.5701754385964912
11,11,48,48,15,8,0.3125,0.6521739130434783,0.4225352112676057,0.5729166666666666
12,12,36,36,9,4,0.25,0.6923076923076923,0.3673469387755102,0.5694444444444444
13,13,17,17,3,5,0.17647058823529413,0.375,0.24,0.4411764705882353
14,14,77,77,5,1,0.06493506493506493,0.8333333333333334,0.12048192771084336,0.525974025974026
15,15,40,40,1,1,0.025,0.5,0.047619047619047616,0.5
16,16,29,29,17,18,0.5862068965517241,0.4857142857142857,0.53125,0.4827586206896552
17,total,1043,1043,160,98,,,,
18,,,,,Micro avg.,0.15340364333652926,0.6201550387596899,0.24596464258262876,0.5297219558964525
19,,,,,Macro avg.,0.17870735365490803,0.5616511578787794,0.24162309790237635,0.5242321618484953
threshold-0.75,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,0,0,0.0,0.0,0.0,0.5
2,2,288,288,10,1,0.034722222222222224,0.9090909090909091,0.06688963210702342,0.515625
3,3,87,87,3,0,0.034482758620689655,1.0,0.06666666666666667,0.5172413793103449
4,4,94,94,1,0,0.010638297872340425,1.0,0.021052631578947368,0.5053191489361702
5,5,62,62,0,0,0.0,0.0,0.0,0.5
6,6,63,63,5,1,0.07936507936507936,0.8333333333333334,0.14492753623188404,0.5317460317460317
7,7,17,17,3,2,0.17647058823529413,0.6,0.2727272727272727,0.5294117647058824
8,8,65,65,3,6,0.046153846153846156,0.3333333333333333,0.08108108108108109,0.47692307692307695
9,9,31,31,2,0,0.06451612903225806,1.0,0.12121212121212122,0.532258064516129
10,10,57,57,1,0,0.017543859649122806,1.0,0.034482758620689655,0.5087719298245614
11,11,48,48,5,1,0.10416666666666667,0.8333333333333334,0.1851851851851852,0.5416666666666666
12,12,36,36,3,1,0.08333333333333333,0.75,0.15,0.5277777777777778
13,13,17,17,0,2,0.0,0.0,0.0,0.4411764705882353
14,14,77,77,0,0,0.0,0.0,0.0,0.5
15,15,40,40,0,0,0.0,0.0,0.0,0.5
16,16,29,29,10,11,0.3448275862068966,0.47619047619047616,0.39999999999999997,0.4827586206896552
17,total,1043,1043,46,25,,,,
18,,,,,Micro avg.,0.04410354745925216,0.647887323943662,0.08258527827648116,0.5100671140939598
19,,,,,Macro avg.,0.05860119807986761,0.5138400814871402,0.09083675796534538,0.5065103489226196
topk-9,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,4,1,0.13333333333333333,0.8,0.2285714285714286,0.55
2,2,288,288,123,118,0.4270833333333333,0.5103734439834025,0.46502835538752363,0.5086805555555556
3,3,87,87,84,79,0.9655172413793104,0.5153374233128835,0.672,0.5287356321839081
4,4,94,94,68,57,0.723404255319149,0.544,0.6210045662100457,0.5585106382978723
5,5,62,62,11,11,0.1774193548387097,0.5,0.2619047619047619,0.5
6,6,63,63,52,27,0.8253968253968254,0.6582278481012658,0.732394366197183,0.6984126984126984
7,7,17,17,14,14,0.8235294117647058,0.5,0.6222222222222222,0.5
8,8,65,65,52,49,0.8,0.5148514851485149,0.6265060240963856,0.5230769230769231
9,9,31,31,28,29,0.9032258064516129,0.49122807017543857,0.6363636363636364,0.4838709677419355
10,10,57,57,48,37,0.8421052631578947,0.5647058823529412,0.676056338028169,0.5964912280701754
11,11,48,48,46,42,0.9583333333333334,0.5227272727272727,0.6764705882352942,0.5416666666666666
12,12,36,36,36,32,1.0,0.5294117647058824,0.6923076923076924,0.5555555555555556
13,13,17,17,17,12,1.0,0.5862068965517241,0.7391304347826086,0.6470588235294118
14,14,77,77,69,53,0.8961038961038961,0.5655737704918032,0.693467336683417,0.6038961038961039
15,15,40,40,21,19,0.525,0.525,0.525,0.525
16,16,29,29,28,29,0.9655172413793104,0.49122807017543857,0.6511627906976745,0.4827586206896552
17,total,1043,1043,701,609,,,,
18,,,,,Micro avg.,0.6720997123681688,0.5351145038167939,0.5958351041223969,0.5441035474592522
19,,,,,Macro avg.,0.7038805468112596,0.5187571722192098,0.5599759142169437,0.5472773184515566
topk-7,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,1,0,0.03333333333333333,1.0,0.06451612903225806,0.5166666666666667
2,2,288,288,100,92,0.3472222222222222,0.5208333333333334,0.4166666666666667,0.5138888888888888
3,3,87,87,78,69,0.896551724137931,0.5306122448979592,0.6666666666666666,0.5517241379310345
4,4,94,94,54,43,0.574468085106383,0.5567010309278351,0.5654450261780105,0.5585106382978723
5,5,62,62,7,6,0.11290322580645161,0.5384615384615384,0.18666666666666668,0.5080645161290323
6,6,63,63,48,15,0.7619047619047619,0.7619047619047619,0.7619047619047619,0.7619047619047619
7,7,17,17,13,12,0.7647058823529411,0.52,0.6190476190476191,0.5294117647058824
8,8,65,65,48,43,0.7384615384615385,0.5274725274725275,0.6153846153846154,0.5384615384615384
9,9,31,31,27,27,0.8709677419354839,0.5,0.6352941176470588,0.5
10,10,57,57,45,31,0.7894736842105263,0.5921052631578947,0.6766917293233082,0.6228070175438597
11,11,48,48,41,35,0.8541666666666666,0.5394736842105263,0.6612903225806451,0.5625
12,12,36,36,35,32,0.9722222222222222,0.5223880597014925,0.6796116504854368,0.5416666666666666
13,13,17,17,16,11,0.9411764705882353,0.5925925925925926,0.7272727272727272,0.6470588235294118
14,14,77,77,63,41,0.8181818181818182,0.6057692307692307,0.6961325966850829,0.6428571428571429
15,15,40,40,12,17,0.3,0.41379310344827586,0.34782608695652173,0.4375
16,16,29,29,28,29,0.9655172413793104,0.49122807017543857,0.6511627906976745,0.4827586206896552
17,total,1043,1043,616,503,,,,
18,,,,,Micro avg.,0.5906040268456376,0.550491510277033,0.5698427382053654,0.5541706615532119
19,,,,,Macro avg.,0.631838624618225,0.5419609082972593,0.5277400101879836,0.553869481427789
topk-5,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,0,0,0.0,0.0,0.0,0.5
2,2,288,288,89,59,0.3090277777777778,0.6013513513513513,0.408256880733945,0.5520833333333334
3,3,87,87,75,65,0.8620689655172413,0.5357142857142857,0.6607929515418501,0.5574712643678161
4,4,94,94,38,26,0.40425531914893614,0.59375,0.48101265822784806,0.5638297872340425
5,5,62,62,6,4,0.0967741935483871,0.6,0.16666666666666666,0.5161290322580645
6,6,63,63,40,10,0.6349206349206349,0.8,0.7079646017699115,0.7380952380952381
7,7,17,17,12,12,0.7058823529411765,0.5,0.5853658536585366,0.5
8,8,65,65,41,39,0.6307692307692307,0.5125,0.5655172413793104,0.5153846153846153
9,9,31,31,27,22,0.8709677419354839,0.5510204081632653,0.6749999999999999,0.5806451612903226
10,10,57,57,36,23,0.631578947368421,0.6101694915254238,0.6206896551724138,0.6140350877192983
11,11,48,48,30,24,0.625,0.5555555555555556,0.5882352941176471,0.5625
12,12,36,36,31,29,0.8611111111111112,0.5166666666666667,0.6458333333333335,0.5277777777777778
13,13,17,17,14,9,0.8235294117647058,0.6086956521739131,0.7,0.6470588235294118
14,14,77,77,49,26,0.6363636363636364,0.6533333333333333,0.6447368421052633,0.6493506493506493
15,15,40,40,10,13,0.25,0.43478260869565216,0.3174603174603175,0.4625
16,16,29,29,27,28,0.9310344827586207,0.4909090909090909,0.6428571428571428,0.4827586206896552
17,total,1043,1043,525,389,,,,
18,,,,,Micro avg.,0.5033557046979866,0.574398249452954,0.5365355135411344,0.5651965484180249
19,,,,,Macro avg.,0.5454872827014919,0.5037910849463846,0.49472879053083446,0.5570364347664838
topk-3,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,0,0,0.0,0.0,0.0,0.5
2,2,288,288,62,36,0.2152777777777778,0.6326530612244898,0.32124352331606215,0.5451388888888888
3,3,87,87,58,44,0.6666666666666666,0.5686274509803921,0.6137566137566137,0.5804597701149425
4,4,94,94,22,8,0.23404255319148937,0.7333333333333333,0.3548387096774194,0.574468085106383
5,5,62,62,2,3,0.03225806451612903,0.4,0.05970149253731343,0.49193548387096775
6,6,63,63,26,5,0.4126984126984127,0.8387096774193549,0.553191489361702,0.6666666666666666
7,7,17,17,9,8,0.5294117647058824,0.5294117647058824,0.5294117647058824,0.5294117647058824
8,8,65,65,27,27,0.4153846153846154,0.5,0.453781512605042,0.5
9,9,31,31,25,17,0.8064516129032258,0.5952380952380952,0.684931506849315,0.6290322580645161
10,10,57,57,25,13,0.43859649122807015,0.6578947368421053,0.5263157894736842,0.6052631578947368
11,11,48,48,20,8,0.4166666666666667,0.7142857142857143,0.5263157894736842,0.625
12,12,36,36,25,17,0.6944444444444444,0.5952380952380952,0.6410256410256411,0.6111111111111112
13,13,17,17,12,6,0.7058823529411765,0.6666666666666666,0.6857142857142857,0.6764705882352942
14,14,77,77,23,16,0.2987012987012987,0.5897435897435898,0.39655172413793105,0.5454545454545454
15,15,40,40,6,7,0.15,0.46153846153846156,0.22641509433962265,0.4875
16,16,29,29,24,25,0.8275862068965517,0.4897959183673469,0.6153846153846154,0.4827586206896552
17,total,1043,1043,366,240,,,,
18,,,,,Micro avg.,0.35091083413231067,0.6039603960396039,0.44390539721043054,0.5604026845637584
19,,,,,Macro avg.,0.4025922899248475,0.5278315626813841,0.4228576207269891,0.5618041729884464