Zero_Shot_Classifier_Tester_for_TP_FP / Reports /report-Model-0_Queries-2_Prompt-1_Strategies-argmax-threshold0.05-threshold0.25-threshold0.5-threshold0.75-topk9-topk7-topk5-topk3.csv
gorkaartola's picture
Upload report-Model-0_Queries-2_Prompt-1_Strategies-argmax-threshold0.05-threshold0.25-threshold0.5-threshold0.75-topk9-topk7-topk5-topk3.csv
55a94ea
argmax_max,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,6,0,0.2,1.0,0.33333333333333337,0.6
2,2,288,288,55,42,0.1909722222222222,0.5670103092783505,0.2857142857142857,0.5225694444444444
3,3,87,87,7,5,0.08045977011494253,0.5833333333333334,0.1414141414141414,0.5114942528735632
4,4,94,94,37,27,0.39361702127659576,0.578125,0.4683544303797469,0.5531914893617021
5,5,62,62,7,3,0.11290322580645161,0.7,0.19444444444444445,0.532258064516129
6,6,63,63,26,2,0.4126984126984127,0.9285714285714286,0.5714285714285714,0.6904761904761905
7,7,17,17,2,1,0.11764705882352941,0.6666666666666666,0.2,0.5294117647058824
8,8,65,65,15,14,0.23076923076923078,0.5172413793103449,0.3191489361702128,0.5076923076923077
9,9,31,31,3,4,0.0967741935483871,0.42857142857142855,0.15789473684210525,0.4838709677419355
10,10,57,57,25,14,0.43859649122807015,0.6410256410256411,0.5208333333333334,0.5964912280701754
11,11,48,48,5,3,0.10416666666666667,0.625,0.1785714285714286,0.5208333333333334
12,12,36,36,0,1,0.0,0.0,0.0,0.4861111111111111
13,13,17,17,14,3,0.8235294117647058,0.8235294117647058,0.8235294117647058,0.8235294117647058
14,14,77,77,27,11,0.35064935064935066,0.7105263157894737,0.46956521739130436,0.6038961038961039
15,15,40,40,3,5,0.075,0.375,0.12499999999999999,0.475
16,16,29,29,15,17,0.5172413793103449,0.46875,0.4918032786885246,0.46551724137931033
17,total,1043,1043,247,152,,,,
18,,,,,Micro avg.,0.236816874400767,0.6190476190476191,0.34257975034674065,0.5455417066155321
19,,,,,Macro avg.,0.24382496675758294,0.5654912302536104,0.3106491499691846,0.5530789947862877
threshold-0.05,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,2,0,1.0,1.0,1.0,1.0
1,1,30,30,29,26,0.9666666666666667,0.5272727272727272,0.6823529411764705,0.55
2,2,288,288,285,283,0.9895833333333334,0.5017605633802817,0.6658878504672898,0.5034722222222222
3,3,87,87,81,64,0.9310344827586207,0.5586206896551724,0.6982758620689656,0.5977011494252874
4,4,94,94,91,92,0.9680851063829787,0.4972677595628415,0.6570397111913358,0.4946808510638298
5,5,62,62,55,61,0.8870967741935484,0.47413793103448276,0.6179775280898876,0.45161290322580644
6,6,63,63,62,51,0.9841269841269841,0.5486725663716814,0.7045454545454545,0.5873015873015873
7,7,17,17,16,14,0.9411764705882353,0.5333333333333333,0.6808510638297872,0.5588235294117647
8,8,65,65,63,64,0.9692307692307692,0.49606299212598426,0.6562499999999999,0.49230769230769234
9,9,31,31,31,31,1.0,0.5,0.6666666666666666,0.5
10,10,57,57,52,46,0.9122807017543859,0.5306122448979592,0.6709677419354839,0.5526315789473685
11,11,48,48,47,44,0.9791666666666666,0.5164835164835165,0.6762589928057554,0.53125
12,12,36,36,28,29,0.7777777777777778,0.49122807017543857,0.6021505376344085,0.4861111111111111
13,13,17,17,16,11,0.9411764705882353,0.5925925925925926,0.7272727272727272,0.6470588235294118
14,14,77,77,73,61,0.948051948051948,0.5447761194029851,0.6919431279620855,0.577922077922078
15,15,40,40,38,40,0.95,0.48717948717948717,0.6440677966101694,0.475
16,16,29,29,29,29,1.0,0.5,0.6666666666666666,0.5
17,total,1043,1043,998,946,,,,
18,,,,,Micro avg.,0.9568552253116012,0.5133744855967078,0.6682289922999666,0.524928092042186
19,,,,,Macro avg.,0.9497325971835382,0.5470588584393226,0.6887749805248913,0.5591690309687153
threshold-0.25,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,0,0.5,1.0,0.6666666666666666,0.75
1,1,30,30,24,18,0.8,0.5714285714285714,0.6666666666666666,0.6
2,2,288,288,280,270,0.9722222222222222,0.509090909090909,0.6682577565632458,0.5173611111111112
3,3,87,87,69,49,0.7931034482758621,0.5847457627118644,0.6731707317073171,0.6149425287356322
4,4,94,94,84,83,0.8936170212765957,0.5029940119760479,0.6436781609195402,0.5053191489361702
5,5,62,62,37,44,0.5967741935483871,0.4567901234567901,0.5174825174825175,0.4435483870967742
6,6,63,63,57,39,0.9047619047619048,0.59375,0.7169811320754718,0.6428571428571429
7,7,17,17,15,9,0.8823529411764706,0.625,0.7317073170731708,0.6764705882352942
8,8,65,65,59,63,0.9076923076923077,0.48360655737704916,0.6310160427807486,0.46923076923076923
9,9,31,31,27,26,0.8709677419354839,0.5094339622641509,0.6428571428571428,0.5161290322580645
10,10,57,57,46,41,0.8070175438596491,0.5287356321839081,0.638888888888889,0.543859649122807
11,11,48,48,41,39,0.8541666666666666,0.5125,0.6406249999999999,0.5208333333333334
12,12,36,36,25,28,0.6944444444444444,0.4716981132075472,0.5617977528089889,0.4583333333333333
13,13,17,17,15,8,0.8823529411764706,0.6521739130434783,0.75,0.7058823529411765
14,14,77,77,68,45,0.8831168831168831,0.6017699115044248,0.7157894736842105,0.6493506493506493
15,15,40,40,31,34,0.775,0.47692307692307695,0.5904761904761905,0.4625
16,16,29,29,29,28,1.0,0.5087719298245614,0.6744186046511628,0.5172413793103449
17,total,1043,1043,908,824,,,,
18,,,,,Micro avg.,0.8705656759348035,0.5242494226327945,0.6544144144144144,0.540268456375839
19,,,,,Macro avg.,0.824564132950197,0.5640830867642577,0.6547341203118782,0.5643446709325062
threshold-0.5,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,18,9,0.6,0.6666666666666666,0.631578947368421,0.65
2,2,288,288,247,231,0.8576388888888888,0.5167364016736402,0.6449086161879896,0.5277777777777778
3,3,87,87,53,29,0.6091954022988506,0.6463414634146342,0.6272189349112427,0.6379310344827587
4,4,94,94,69,70,0.7340425531914894,0.49640287769784175,0.592274678111588,0.4946808510638298
5,5,62,62,22,22,0.3548387096774194,0.5,0.41509433962264153,0.5
6,6,63,63,48,21,0.7619047619047619,0.6956521739130435,0.7272727272727272,0.7142857142857143
7,7,17,17,9,5,0.5294117647058824,0.6428571428571429,0.5806451612903226,0.6176470588235294
8,8,65,65,55,58,0.8461538461538461,0.48672566371681414,0.6179775280898876,0.47692307692307695
9,9,31,31,19,11,0.6129032258064516,0.6333333333333333,0.6229508196721313,0.6290322580645161
10,10,57,57,44,35,0.7719298245614035,0.5569620253164557,0.6470588235294117,0.5789473684210527
11,11,48,48,28,24,0.5833333333333334,0.5384615384615384,0.5599999999999999,0.5416666666666666
12,12,36,36,14,17,0.3888888888888889,0.45161290322580644,0.417910447761194,0.4583333333333333
13,13,17,17,14,6,0.8235294117647058,0.7,0.7567567567567567,0.7352941176470589
14,14,77,77,62,37,0.8051948051948052,0.6262626262626263,0.7045454545454545,0.6623376623376623
15,15,40,40,22,22,0.55,0.5,0.5238095238095238,0.5
16,16,29,29,29,27,1.0,0.5178571428571429,0.6823529411764707,0.5344827586206896
17,total,1043,1043,753,624,,,,
18,,,,,Micro avg.,0.7219558964525408,0.5468409586056645,0.6223140495867769,0.5618408437200384
19,,,,,Macro avg.,0.6369979656688664,0.539757174082158,0.5736679823591625,0.5740788046145686
threshold-0.75,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,10,1,0.3333333333333333,0.9090909090909091,0.4878048780487804,0.65
2,2,288,288,119,96,0.4131944444444444,0.5534883720930233,0.4731610337972167,0.5399305555555556
3,3,87,87,18,15,0.20689655172413793,0.5454545454545454,0.3,0.5172413793103449
4,4,94,94,49,41,0.5212765957446809,0.5444444444444444,0.532608695652174,0.5425531914893617
5,5,62,62,15,6,0.24193548387096775,0.7142857142857143,0.3614457831325301,0.5725806451612904
6,6,63,63,34,9,0.5396825396825397,0.7906976744186046,0.6415094339622641,0.6984126984126984
7,7,17,17,4,2,0.23529411764705882,0.6666666666666666,0.3478260869565218,0.5588235294117647
8,8,65,65,38,38,0.5846153846153846,0.5,0.5390070921985816,0.5
9,9,31,31,13,4,0.41935483870967744,0.7647058823529411,0.5416666666666666,0.6451612903225806
10,10,57,57,38,23,0.6666666666666666,0.6229508196721312,0.6440677966101694,0.631578947368421
11,11,48,48,17,12,0.3541666666666667,0.5862068965517241,0.4415584415584416,0.5520833333333334
12,12,36,36,5,6,0.1388888888888889,0.45454545454545453,0.21276595744680854,0.4861111111111111
13,13,17,17,14,6,0.8235294117647058,0.7,0.7567567567567567,0.7352941176470589
14,14,77,77,41,27,0.5324675324675324,0.6029411764705882,0.5655172413793104,0.5909090909090909
15,15,40,40,10,14,0.25,0.4166666666666667,0.3125,0.45
16,16,29,29,24,24,0.8275862068965517,0.5,0.6233766233766235,0.5
17,total,1043,1043,449,324,,,,
18,,,,,Micro avg.,0.4304889741131352,0.5808538163001293,0.4944933920704846,0.5599232981783318
19,,,,,Macro avg.,0.41699345077195515,0.580714424865495,0.4577395580907556,0.5688635229430947
topk-9,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,0,0.5,1.0,0.6666666666666666,0.75
1,1,30,30,24,19,0.8,0.5581395348837209,0.6575342465753425,0.5833333333333334
2,2,288,288,284,273,0.9861111111111112,0.5098743267504489,0.672189349112426,0.5190972222222222
3,3,87,87,69,47,0.7931034482758621,0.5948275862068966,0.6798029556650247,0.6264367816091954
4,4,94,94,85,87,0.9042553191489362,0.4941860465116279,0.6390977443609023,0.48936170212765956
5,5,62,62,27,34,0.43548387096774194,0.4426229508196721,0.43902439024390244,0.4435483870967742
6,6,63,63,57,38,0.9047619047619048,0.6,0.7215189873417721,0.6507936507936508
7,7,17,17,14,9,0.8235294117647058,0.6086956521739131,0.7,0.6470588235294118
8,8,65,65,60,61,0.9230769230769231,0.49586776859504134,0.6451612903225807,0.49230769230769234
9,9,31,31,29,24,0.9354838709677419,0.5471698113207547,0.6904761904761905,0.5806451612903226
10,10,57,57,46,41,0.8070175438596491,0.5287356321839081,0.638888888888889,0.543859649122807
11,11,48,48,38,36,0.7916666666666666,0.5135135135135135,0.6229508196721311,0.5208333333333334
12,12,36,36,22,26,0.6111111111111112,0.4583333333333333,0.5238095238095238,0.4444444444444444
13,13,17,17,15,6,0.8823529411764706,0.7142857142857143,0.7894736842105262,0.7647058823529411
14,14,77,77,65,44,0.8441558441558441,0.5963302752293578,0.6989247311827956,0.6363636363636364
15,15,40,40,35,38,0.875,0.4794520547945205,0.6194690265486725,0.4625
16,16,29,29,29,29,1.0,0.5,0.6666666666666666,0.5
17,total,1043,1043,900,812,,,,
18,,,,,Micro avg.,0.862895493767977,0.5257009345794392,0.6533575317604355,0.5421860019175455
19,,,,,Macro avg.,0.8127711745320393,0.5671784823883778,0.6512738330437655,0.5679582176427899
topk-7,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,21,14,0.7,0.6,0.6461538461538462,0.6166666666666667
2,2,288,288,278,263,0.9652777777777778,0.5138632162661737,0.6706875753920386,0.5260416666666666
3,3,87,87,59,38,0.6781609195402298,0.6082474226804123,0.6413043478260869,0.6206896551724138
4,4,94,94,79,82,0.8404255319148937,0.4906832298136646,0.6196078431372549,0.48404255319148937
5,5,62,62,22,25,0.3548387096774194,0.46808510638297873,0.4036697247706422,0.47580645161290325
6,6,63,63,52,27,0.8253968253968254,0.6582278481012658,0.732394366197183,0.6984126984126984
7,7,17,17,12,8,0.7058823529411765,0.6,0.6486486486486486,0.6176470588235294
8,8,65,65,58,59,0.8923076923076924,0.49572649572649574,0.6373626373626374,0.49230769230769234
9,9,31,31,23,22,0.7419354838709677,0.5111111111111111,0.6052631578947368,0.5161290322580645
10,10,57,57,43,34,0.7543859649122807,0.5584415584415584,0.6417910447761195,0.5789473684210527
11,11,48,48,31,28,0.6458333333333334,0.5254237288135594,0.5794392523364487,0.53125
12,12,36,36,19,21,0.5277777777777778,0.475,0.5,0.4722222222222222
13,13,17,17,15,6,0.8823529411764706,0.7142857142857143,0.7894736842105262,0.7647058823529411
14,14,77,77,62,36,0.8051948051948052,0.6326530612244898,0.7085714285714286,0.6688311688311688
15,15,40,40,32,35,0.8,0.47761194029850745,0.5981308411214953,0.4625
16,16,29,29,29,29,1.0,0.5,0.6666666666666666,0.5
17,total,1043,1043,835,727,,,,
18,,,,,Micro avg.,0.800575263662512,0.5345710627400768,0.6410748560460652,0.5517737296260786
19,,,,,Macro avg.,0.712927653871862,0.5193741431262312,0.5934802979450446,0.5603647127611476
topk-5,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,14,6,0.4666666666666667,0.7,0.56,0.6333333333333333
2,2,288,288,254,240,0.8819444444444444,0.5141700404858299,0.649616368286445,0.5243055555555556
3,3,87,87,49,28,0.5632183908045977,0.6363636363636364,0.5975609756097561,0.6206896551724138
4,4,94,94,66,66,0.7021276595744681,0.5,0.584070796460177,0.5
5,5,62,62,16,17,0.25806451612903225,0.48484848484848486,0.3368421052631579,0.49193548387096775
6,6,63,63,47,14,0.746031746031746,0.7704918032786885,0.7580645161290323,0.7619047619047619
7,7,17,17,9,5,0.5294117647058824,0.6428571428571429,0.5806451612903226,0.6176470588235294
8,8,65,65,51,56,0.7846153846153846,0.4766355140186916,0.5930232558139534,0.46153846153846156
9,9,31,31,19,13,0.6129032258064516,0.59375,0.6031746031746031,0.5967741935483871
10,10,57,57,43,32,0.7543859649122807,0.5733333333333334,0.6515151515151515,0.5964912280701754
11,11,48,48,21,20,0.4375,0.5121951219512195,0.47191011235955055,0.5104166666666666
12,12,36,36,10,15,0.2777777777777778,0.4,0.3278688524590164,0.4305555555555556
13,13,17,17,14,6,0.8235294117647058,0.7,0.7567567567567567,0.7352941176470589
14,14,77,77,55,30,0.7142857142857143,0.6470588235294118,0.6790123456790124,0.6623376623376623
15,15,40,40,26,28,0.65,0.48148148148148145,0.553191489361702,0.475
16,16,29,29,29,26,1.0,0.5272727272727272,0.6904761904761904,0.5517241379310345
17,total,1043,1043,723,602,,,,
18,,,,,Micro avg.,0.6931927133269415,0.5456603773584906,0.6106418918918919,0.5580057526366251
19,,,,,Macro avg.,0.6001448627952443,0.5388504770247441,0.5525722753314604,0.5688204630562097
topk-3,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,11,2,0.36666666666666664,0.8461538461538461,0.5116279069767441,0.65
2,2,288,288,182,167,0.6319444444444444,0.5214899713467048,0.5714285714285713,0.5260416666666666
3,3,87,87,30,15,0.3448275862068966,0.6666666666666666,0.4545454545454545,0.5862068965517241
4,4,94,94,60,50,0.6382978723404256,0.5454545454545454,0.5882352941176471,0.5531914893617021
5,5,62,62,14,7,0.22580645161290322,0.6666666666666666,0.3373493975903614,0.5564516129032258
6,6,63,63,40,8,0.6349206349206349,0.8333333333333334,0.7207207207207206,0.753968253968254
7,7,17,17,4,3,0.23529411764705882,0.5714285714285714,0.3333333333333333,0.5294117647058824
8,8,65,65,42,47,0.6461538461538462,0.47191011235955055,0.5454545454545455,0.46153846153846156
9,9,31,31,16,6,0.5161290322580645,0.7272727272727273,0.6037735849056604,0.6612903225806451
10,10,57,57,40,29,0.7017543859649122,0.5797101449275363,0.634920634920635,0.5964912280701754
11,11,48,48,16,12,0.3333333333333333,0.5714285714285714,0.4210526315789474,0.5416666666666666
12,12,36,36,5,6,0.1388888888888889,0.45454545454545453,0.21276595744680854,0.4861111111111111
13,13,17,17,14,6,0.8235294117647058,0.7,0.7567567567567567,0.7352941176470589
14,14,77,77,46,20,0.5974025974025974,0.696969696969697,0.6433566433566433,0.6688311688311688
15,15,40,40,12,16,0.3,0.42857142857142855,0.3529411764705882,0.45
16,16,29,29,26,24,0.896551724137931,0.52,0.6582278481012658,0.5344827586206896
17,total,1043,1043,558,418,,,,
18,,,,,Micro avg.,0.5349952061361457,0.5717213114754098,0.5527488855869243,0.5671140939597316
19,,,,,Macro avg.,0.47244123492607704,0.576564808066194,0.49097002692380487,0.5759398540719666