Zero_Shot_Classifier_Tester_for_TP_FP / Reports /report-Model-0_Queries-1_Prompt-2_Strategies-argmax-threshold0.05-threshold0.25-threshold0.5-threshold0.75-topk9-topk7-topk5-topk3.csv
gorkaartola's picture
Upload report-Model-0_Queries-1_Prompt-2_Strategies-argmax-threshold0.05-threshold0.25-threshold0.5-threshold0.75-topk9-topk7-topk5-topk3.csv
4306387
argmax_max,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,3,0,0.1,1.0,0.18181818181818182,0.55
2,2,288,288,13,17,0.04513888888888889,0.43333333333333335,0.08176100628930817,0.4930555555555556
3,3,87,87,19,6,0.21839080459770116,0.76,0.3392857142857143,0.5747126436781609
4,4,94,94,20,9,0.2127659574468085,0.6896551724137931,0.3252032520325203,0.5585106382978723
5,5,62,62,1,0,0.016129032258064516,1.0,0.031746031746031744,0.5080645161290323
6,6,63,63,0,0,0.0,0.0,0.0,0.5
7,7,17,17,2,1,0.11764705882352941,0.6666666666666666,0.2,0.5294117647058824
8,8,65,65,4,2,0.06153846153846154,0.6666666666666666,0.11267605633802817,0.5153846153846153
9,9,31,31,7,6,0.22580645161290322,0.5384615384615384,0.3181818181818182,0.5161290322580645
10,10,57,57,11,7,0.19298245614035087,0.6111111111111112,0.29333333333333333,0.5350877192982456
11,11,48,48,31,22,0.6458333333333334,0.5849056603773585,0.6138613861386139,0.59375
12,12,36,36,2,6,0.05555555555555555,0.25,0.0909090909090909,0.4444444444444444
13,13,17,17,4,3,0.23529411764705882,0.5714285714285714,0.3333333333333333,0.5294117647058824
14,14,77,77,7,1,0.09090909090909091,0.875,0.16470588235294117,0.538961038961039
15,15,40,40,7,9,0.175,0.4375,0.24999999999999994,0.475
16,16,29,29,0,1,0.0,0.0,0.0,0.4827586206896552
17,total,1043,1043,131,90,,,,
18,,,,,Micro avg.,0.12559923298178333,0.5927601809954751,0.20727848101265825,0.5196548418024928
19,,,,,Macro avg.,0.14076418875010274,0.5343958070858259,0.19628324039758324,0.5202754325946147
threshold-0.05,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,1,0.5,0.5,0.5,0.5
1,1,30,30,28,19,0.9333333333333333,0.5957446808510638,0.7272727272727273,0.65
2,2,288,288,139,136,0.4826388888888889,0.5054545454545455,0.4937833037300178,0.5052083333333334
3,3,87,87,73,48,0.8390804597701149,0.6033057851239669,0.7019230769230769,0.6436781609195402
4,4,94,94,66,62,0.7021276595744681,0.515625,0.5945945945945946,0.5212765957446809
5,5,62,62,40,44,0.6451612903225806,0.47619047619047616,0.547945205479452,0.46774193548387094
6,6,63,63,55,26,0.873015873015873,0.6790123456790124,0.7638888888888891,0.7301587301587301
7,7,17,17,14,9,0.8235294117647058,0.6086956521739131,0.7,0.6470588235294118
8,8,65,65,53,48,0.8153846153846154,0.5247524752475248,0.6385542168674698,0.5384615384615384
9,9,31,31,26,26,0.8387096774193549,0.5,0.6265060240963854,0.5
10,10,57,57,50,40,0.8771929824561403,0.5555555555555556,0.6802721088435374,0.5877192982456141
11,11,48,48,46,46,0.9583333333333334,0.5,0.6571428571428571,0.5
12,12,36,36,32,35,0.8888888888888888,0.47761194029850745,0.6213592233009708,0.4583333333333333
13,13,17,17,13,13,0.7647058823529411,0.5,0.6046511627906976,0.5
14,14,77,77,50,54,0.6493506493506493,0.4807692307692308,0.5524861878453039,0.474025974025974
15,15,40,40,22,34,0.55,0.39285714285714285,0.45833333333333337,0.35
16,16,29,29,19,26,0.6551724137931034,0.4222222222222222,0.5135135135135135,0.3793103448275862
17,total,1043,1043,727,667,,,,
18,,,,,Micro avg.,0.6970278044103547,0.5215208034433285,0.5966352072219943,0.5287631831255992
19,,,,,Macro avg.,0.7527426682146467,0.5198704148484211,0.6107192014484015,0.5266454745919772
threshold-0.25,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,0,0.5,1.0,0.6666666666666666,0.75
1,1,30,30,21,11,0.7,0.65625,0.6774193548387096,0.6666666666666666
2,2,288,288,69,58,0.23958333333333334,0.5433070866141733,0.3325301204819277,0.5190972222222222
3,3,87,87,55,26,0.632183908045977,0.6790123456790124,0.6547619047619048,0.6666666666666666
4,4,94,94,51,37,0.5425531914893617,0.5795454545454546,0.5604395604395604,0.574468085106383
5,5,62,62,24,22,0.3870967741935484,0.5217391304347826,0.4444444444444444,0.5161290322580645
6,6,63,63,45,14,0.7142857142857143,0.7627118644067796,0.7377049180327869,0.746031746031746
7,7,17,17,11,7,0.6470588235294118,0.6111111111111112,0.6285714285714287,0.6176470588235294
8,8,65,65,39,32,0.6,0.5492957746478874,0.5735294117647058,0.5538461538461539
9,9,31,31,22,19,0.7096774193548387,0.5365853658536586,0.6111111111111112,0.5483870967741935
10,10,57,57,42,25,0.7368421052631579,0.6268656716417911,0.6774193548387096,0.6491228070175439
11,11,48,48,42,42,0.875,0.5,0.6363636363636364,0.5
12,12,36,36,31,31,0.8611111111111112,0.5,0.6326530612244898,0.5
13,13,17,17,9,9,0.5294117647058824,0.5,0.5142857142857143,0.5
14,14,77,77,26,25,0.33766233766233766,0.5098039215686274,0.40625,0.5064935064935064
15,15,40,40,19,26,0.475,0.4222222222222222,0.4470588235294118,0.4125
16,16,29,29,10,12,0.3448275862068966,0.45454545454545453,0.39215686274509803,0.46551724137931033
17,total,1043,1043,517,396,,,,
18,,,,,Micro avg.,0.4956855225311601,0.5662650602409639,0.5286298568507157,0.5580057526366251
19,,,,,Macro avg.,0.5783702393636218,0.585470317839468,0.564315669064724,0.5701513696050577
threshold-0.5,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,12,5,0.4,0.7058823529411765,0.5106382978723405,0.6166666666666667
2,2,288,288,23,24,0.0798611111111111,0.48936170212765956,0.1373134328358209,0.4982638888888889
3,3,87,87,29,11,0.3333333333333333,0.725,0.45669291338582674,0.603448275862069
4,4,94,94,22,13,0.23404255319148937,0.6285714285714286,0.34108527131782945,0.5478723404255319
5,5,62,62,9,5,0.14516129032258066,0.6428571428571429,0.2368421052631579,0.532258064516129
6,6,63,63,15,1,0.23809523809523808,0.9375,0.37974683544303794,0.6111111111111112
7,7,17,17,7,1,0.4117647058823529,0.875,0.56,0.6764705882352942
8,8,65,65,23,14,0.35384615384615387,0.6216216216216216,0.4509803921568628,0.5692307692307692
9,9,31,31,11,8,0.3548387096774194,0.5789473684210527,0.44000000000000006,0.5483870967741935
10,10,57,57,28,17,0.49122807017543857,0.6222222222222222,0.5490196078431373,0.5964912280701754
11,11,48,48,38,32,0.7916666666666666,0.5428571428571428,0.6440677966101694,0.5625
12,12,36,36,13,11,0.3611111111111111,0.5416666666666666,0.43333333333333335,0.5277777777777778
13,13,17,17,5,5,0.29411764705882354,0.5,0.37037037037037035,0.5
14,14,77,77,13,14,0.16883116883116883,0.48148148148148145,0.25,0.4935064935064935
15,15,40,40,13,20,0.325,0.3939393939393939,0.35616438356164376,0.4125
16,16,29,29,4,3,0.13793103448275862,0.5714285714285714,0.2222222222222222,0.5172413793103449
17,total,1043,1043,265,184,,,,
18,,,,,Micro avg.,0.25407478427612656,0.5902004454342984,0.35522788203753347,0.538830297219559
19,,,,,Macro avg.,0.3012252231638616,0.5799021820667976,0.3728515860126913,0.5478662164926732
threshold-0.75,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,9,1,0.3,0.9,0.45000000000000007,0.6333333333333333
2,2,288,288,8,3,0.027777777777777776,0.7272727272727273,0.05351170568561872,0.5086805555555556
3,3,87,87,9,2,0.10344827586206896,0.8181818181818182,0.18367346938775508,0.5402298850574713
4,4,94,94,8,1,0.0851063829787234,0.8888888888888888,0.15533980582524273,0.5372340425531915
5,5,62,62,1,0,0.016129032258064516,1.0,0.031746031746031744,0.5080645161290323
6,6,63,63,3,0,0.047619047619047616,1.0,0.0909090909090909,0.5238095238095238
7,7,17,17,3,0,0.17647058823529413,1.0,0.3,0.5882352941176471
8,8,65,65,11,6,0.16923076923076924,0.6470588235294118,0.2682926829268293,0.5384615384615384
9,9,31,31,4,3,0.12903225806451613,0.5714285714285714,0.2105263157894737,0.5161290322580645
10,10,57,57,18,4,0.3157894736842105,0.8181818181818182,0.4556962025316456,0.6228070175438597
11,11,48,48,31,17,0.6458333333333334,0.6458333333333334,0.6458333333333334,0.6458333333333334
12,12,36,36,6,3,0.16666666666666666,0.6666666666666666,0.26666666666666666,0.5416666666666666
13,13,17,17,2,1,0.11764705882352941,0.6666666666666666,0.2,0.5294117647058824
14,14,77,77,6,7,0.07792207792207792,0.46153846153846156,0.13333333333333333,0.4935064935064935
15,15,40,40,4,4,0.1,0.5,0.16666666666666669,0.5
16,16,29,29,2,1,0.06896551724137931,0.6666666666666666,0.125,0.5172413793103449
17,total,1043,1043,125,53,,,,
18,,,,,Micro avg.,0.11984659635666348,0.702247191011236,0.20475020475020475,0.534515819750719
19,,,,,Macro avg.,0.14986107409985053,0.7046108495502958,0.21983501792951107,0.5438026103730552
topk-9,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,1,0.5,0.5,0.5,0.5
1,1,30,30,25,17,0.8333333333333334,0.5952380952380952,0.6944444444444444,0.6333333333333333
2,2,288,288,111,98,0.3854166666666667,0.5311004784688995,0.44668008048289737,0.5225694444444444
3,3,87,87,67,36,0.7701149425287356,0.6504854368932039,0.7052631578947368,0.6781609195402298
4,4,94,94,67,65,0.7127659574468085,0.5075757575757576,0.5929203539823009,0.5106382978723404
5,5,62,62,18,23,0.2903225806451613,0.43902439024390244,0.34951456310679613,0.4596774193548387
6,6,63,63,48,21,0.7619047619047619,0.6956521739130435,0.7272727272727272,0.7142857142857143
7,7,17,17,11,11,0.6470588235294118,0.5,0.5641025641025642,0.5
8,8,65,65,51,38,0.7846153846153846,0.5730337078651685,0.6623376623376623,0.6
9,9,31,31,25,23,0.8064516129032258,0.5208333333333334,0.6329113924050633,0.532258064516129
10,10,57,57,49,32,0.8596491228070176,0.6049382716049383,0.7101449275362319,0.6491228070175439
11,11,48,48,48,44,1.0,0.5217391304347826,0.6857142857142856,0.5416666666666666
12,12,36,36,33,32,0.9166666666666666,0.5076923076923077,0.6534653465346535,0.5138888888888888
13,13,17,17,15,8,0.8823529411764706,0.6521739130434783,0.75,0.7058823529411765
14,14,77,77,38,30,0.4935064935064935,0.5588235294117647,0.5241379310344827,0.551948051948052
15,15,40,40,36,36,0.9,0.5,0.6428571428571429,0.5
16,16,29,29,17,15,0.5862068965517241,0.53125,0.5573770491803278,0.5344827586206896
17,total,1043,1043,660,530,,,,
18,,,,,Micro avg.,0.6327900287631831,0.5546218487394958,0.5911330049261084,0.5623202301054651
19,,,,,Macro avg.,0.7135509520165801,0.5523270897481574,0.6117143311109597,0.567524395260591
topk-7,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,1,0.5,0.5,0.5,0.5
1,1,30,30,25,16,0.8333333333333334,0.6097560975609756,0.7042253521126761,0.65
2,2,288,288,93,83,0.3229166666666667,0.5284090909090909,0.4008620689655173,0.5173611111111112
3,3,87,87,61,25,0.7011494252873564,0.7093023255813954,0.7052023121387283,0.7068965517241379
4,4,94,94,62,54,0.6595744680851063,0.5344827586206896,0.5904761904761905,0.5425531914893617
5,5,62,62,14,18,0.22580645161290322,0.4375,0.2978723404255319,0.46774193548387094
6,6,63,63,43,12,0.6825396825396826,0.7818181818181819,0.7288135593220338,0.746031746031746
7,7,17,17,11,8,0.6470588235294118,0.5789473684210527,0.6111111111111113,0.5882352941176471
8,8,65,65,45,27,0.6923076923076923,0.625,0.6569343065693432,0.6384615384615384
9,9,31,31,23,19,0.7419354838709677,0.5476190476190477,0.6301369863013699,0.5645161290322581
10,10,57,57,45,26,0.7894736842105263,0.6338028169014085,0.7031250000000001,0.6666666666666666
11,11,48,48,47,43,0.9791666666666666,0.5222222222222223,0.6811594202898551,0.5416666666666666
12,12,36,36,31,31,0.8611111111111112,0.5,0.6326530612244898,0.5
13,13,17,17,13,7,0.7647058823529411,0.65,0.7027027027027027,0.6764705882352942
14,14,77,77,26,23,0.33766233766233766,0.5306122448979592,0.4126984126984127,0.5194805194805194
15,15,40,40,32,32,0.8,0.5,0.6153846153846154,0.5
16,16,29,29,14,10,0.4827586206896552,0.5833333333333334,0.5283018867924529,0.5689655172413793
17,total,1043,1043,586,435,,,,
18,,,,,Micro avg.,0.5618408437200384,0.5739471106758081,0.5678294573643411,0.5723873441994247
19,,,,,Macro avg.,0.6483235488191975,0.5748709110520798,0.5942152545008842,0.5820616150436586
topk-5,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,1,0.5,0.5,0.5,0.5
1,1,30,30,23,13,0.7666666666666667,0.6388888888888888,0.696969696969697,0.6666666666666666
2,2,288,288,66,65,0.22916666666666666,0.5038167938931297,0.315035799522673,0.5017361111111112
3,3,87,87,53,22,0.6091954022988506,0.7066666666666667,0.6543209876543209,0.6781609195402298
4,4,94,94,55,38,0.5851063829787234,0.5913978494623656,0.5882352941176471,0.5904255319148937
5,5,62,62,11,13,0.1774193548387097,0.4583333333333333,0.2558139534883721,0.4838709677419355
6,6,63,63,34,4,0.5396825396825397,0.8947368421052632,0.6732673267326732,0.7380952380952381
7,7,17,17,11,5,0.6470588235294118,0.6875,0.6666666666666667,0.6764705882352942
8,8,65,65,37,19,0.5692307692307692,0.6607142857142857,0.6115702479338843,0.6384615384615384
9,9,31,31,20,17,0.6451612903225806,0.5405405405405406,0.588235294117647,0.5483870967741935
10,10,57,57,39,22,0.6842105263157895,0.639344262295082,0.6610169491525425,0.6491228070175439
11,11,48,48,46,39,0.9583333333333334,0.5411764705882353,0.6917293233082706,0.5729166666666666
12,12,36,36,27,26,0.75,0.5094339622641509,0.6067415730337078,0.5138888888888888
13,13,17,17,11,6,0.6470588235294118,0.6470588235294118,0.6470588235294118,0.6470588235294118
14,14,77,77,22,12,0.2857142857142857,0.6470588235294118,0.3963963963963964,0.564935064935065
15,15,40,40,26,29,0.65,0.4727272727272727,0.5473684210526315,0.4625
16,16,29,29,5,7,0.1724137931034483,0.4166666666666667,0.2439024390243903,0.46551724137931033
17,total,1043,1043,487,338,,,,
18,,,,,Micro avg.,0.4669223394055609,0.5903030303030303,0.5214132762312634,0.5714285714285714
19,,,,,Macro avg.,0.5539069798947757,0.5915330283649824,0.5496664231000549,0.5822478912328228
topk-3,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,0,0.5,1.0,0.6666666666666666,0.75
1,1,30,30,19,9,0.6333333333333333,0.6785714285714286,0.6551724137931035,0.6666666666666666
2,2,288,288,40,42,0.1388888888888889,0.4878048780487805,0.21621621621621626,0.4965277777777778
3,3,87,87,47,14,0.5402298850574713,0.7704918032786885,0.6351351351351352,0.6896551724137931
4,4,94,94,44,27,0.46808510638297873,0.6197183098591549,0.5333333333333332,0.5904255319148937
5,5,62,62,8,5,0.12903225806451613,0.6153846153846154,0.21333333333333332,0.5241935483870968
6,6,63,63,19,1,0.30158730158730157,0.95,0.4578313253012048,0.6428571428571429
7,7,17,17,8,4,0.47058823529411764,0.6666666666666666,0.5517241379310345,0.6176470588235294
8,8,65,65,29,9,0.4461538461538462,0.7631578947368421,0.5631067961165048,0.6538461538461539
9,9,31,31,12,10,0.3870967741935484,0.5454545454545454,0.45283018867924524,0.532258064516129
10,10,57,57,34,17,0.5964912280701754,0.6666666666666666,0.6296296296296297,0.6491228070175439
11,11,48,48,41,31,0.8541666666666666,0.5694444444444444,0.6833333333333332,0.6041666666666666
12,12,36,36,20,19,0.5555555555555556,0.5128205128205128,0.5333333333333333,0.5138888888888888
13,13,17,17,9,5,0.5294117647058824,0.6428571428571429,0.5806451612903226,0.6176470588235294
14,14,77,77,15,5,0.19480519480519481,0.75,0.309278350515464,0.564935064935065
15,15,40,40,24,21,0.6,0.5333333333333333,0.5647058823529412,0.5375
16,16,29,29,1,3,0.034482758620689655,0.25,0.0606060606060606,0.46551724137931033
17,total,1043,1043,371,222,,,,
18,,,,,Micro avg.,0.35570469798657717,0.6256323777403036,0.45354523227383864,0.5714285714285714
19,,,,,Macro avg.,0.4341122821988333,0.6483748377719307,0.4886400763274625,0.5951091085243639