Zero_Shot_Classifier_Tester_for_TP_FP / Reports /report-Model-1_Queries-1_Prompt-3_Strategies-argmax-threshold0.05-threshold0.25-threshold0.5-threshold0.75-topk9-topk7-topk5-topk3.csv
gorkaartola's picture
Upload report-Model-1_Queries-1_Prompt-3_Strategies-argmax-threshold0.05-threshold0.25-threshold0.5-threshold0.75-topk9-topk7-topk5-topk3.csv
79ef3ba
argmax_max,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,1,0,0.03333333333333333,1.0,0.06451612903225806,0.5166666666666667
2,2,288,288,8,4,0.027777777777777776,0.6666666666666666,0.05333333333333333,0.5069444444444444
3,3,87,87,35,12,0.40229885057471265,0.7446808510638298,0.5223880597014925,0.632183908045977
4,4,94,94,1,1,0.010638297872340425,0.5,0.020833333333333336,0.5
5,5,62,62,3,4,0.04838709677419355,0.42857142857142855,0.08695652173913042,0.49193548387096775
6,6,63,63,7,0,0.1111111111111111,1.0,0.19999999999999998,0.5555555555555556
7,7,17,17,6,2,0.35294117647058826,0.75,0.48,0.6176470588235294
8,8,65,65,7,5,0.1076923076923077,0.5833333333333334,0.18181818181818185,0.5153846153846153
9,9,31,31,0,1,0.0,0.0,0.0,0.4838709677419355
10,10,57,57,13,10,0.22807017543859648,0.5652173913043478,0.32499999999999996,0.5263157894736842
11,11,48,48,9,1,0.1875,0.9,0.31034482758620696,0.5833333333333334
12,12,36,36,2,1,0.05555555555555555,0.6666666666666666,0.10256410256410256,0.5138888888888888
13,13,17,17,3,1,0.17647058823529413,0.75,0.2857142857142857,0.5588235294117647
14,14,77,77,20,22,0.2597402597402597,0.47619047619047616,0.3361344537815126,0.487012987012987
15,15,40,40,11,13,0.275,0.4583333333333333,0.34374999999999994,0.475
16,16,29,29,2,2,0.06896551724137931,0.5,0.1212121212121212,0.5
17,total,1043,1043,128,79,,,,
18,,,,,Micro avg.,0.12272291466922339,0.6183574879227053,0.20479999999999998,0.5234899328859061
19,,,,,Macro avg.,0.13796953222455588,0.5876270674782402,0.20203325587152698,0.5273272487443734
threshold-0.05,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,2,0.5,0.3333333333333333,0.4,0.25
1,1,30,30,28,27,0.9333333333333333,0.509090909090909,0.6588235294117647,0.5166666666666667
2,2,288,288,272,279,0.9444444444444444,0.49364791288566245,0.6483909415971395,0.4878472222222222
3,3,87,87,84,79,0.9655172413793104,0.5153374233128835,0.672,0.5287356321839081
4,4,94,94,85,77,0.9042553191489362,0.5246913580246914,0.6640625,0.5425531914893617
5,5,62,62,59,61,0.9516129032258065,0.49166666666666664,0.6483516483516484,0.4838709677419355
6,6,63,63,61,58,0.9682539682539683,0.5126050420168067,0.6703296703296703,0.5238095238095238
7,7,17,17,16,16,0.9411764705882353,0.5,0.6530612244897959,0.5
8,8,65,65,64,64,0.9846153846153847,0.5,0.6632124352331606,0.5
9,9,31,31,30,31,0.967741935483871,0.4918032786885246,0.6521739130434782,0.4838709677419355
10,10,57,57,57,55,1.0,0.5089285714285714,0.6745562130177515,0.5175438596491229
11,11,48,48,48,48,1.0,0.5,0.6666666666666666,0.5
12,12,36,36,35,35,0.9722222222222222,0.5,0.660377358490566,0.5
13,13,17,17,17,16,1.0,0.5151515151515151,0.6799999999999999,0.5294117647058824
14,14,77,77,76,76,0.987012987012987,0.5,0.6637554585152838,0.5
15,15,40,40,40,38,1.0,0.5128205128205128,0.6779661016949152,0.525
16,16,29,29,29,29,1.0,0.5,0.6666666666666666,0.5
17,total,1043,1043,1002,991,,,,
18,,,,,Micro avg.,0.9606903163950143,0.5027596588058204,0.6600790513833992,0.5052732502396932
19,,,,,Macro avg.,0.9423638946887352,0.49465156020118095,0.6482584898534417,0.49348881154179763
threshold-0.25,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,21,11,0.7,0.65625,0.6774193548387096,0.6666666666666666
2,2,288,288,205,215,0.7118055555555556,0.4880952380952381,0.57909604519774,0.4826388888888889
3,3,87,87,81,65,0.9310344827586207,0.5547945205479452,0.6952789699570815,0.5919540229885057
4,4,94,94,34,47,0.3617021276595745,0.41975308641975306,0.38857142857142857,0.4308510638297872
5,5,62,62,51,59,0.8225806451612904,0.4636363636363636,0.5930232558139535,0.43548387096774194
6,6,63,63,56,33,0.8888888888888888,0.6292134831460674,0.7368421052631579,0.6825396825396826
7,7,17,17,15,14,0.8823529411764706,0.5172413793103449,0.6521739130434783,0.5294117647058824
8,8,65,65,63,55,0.9692307692307692,0.5338983050847458,0.6885245901639344,0.5615384615384615
9,9,31,31,21,19,0.6774193548387096,0.525,0.5915492957746479,0.532258064516129
10,10,57,57,53,52,0.9298245614035088,0.5047619047619047,0.6543209876543209,0.5087719298245614
11,11,48,48,47,48,0.9791666666666666,0.49473684210526314,0.6573426573426573,0.4895833333333333
12,12,36,36,28,27,0.7777777777777778,0.509090909090909,0.6153846153846153,0.5138888888888888
13,13,17,17,16,12,0.9411764705882353,0.5714285714285714,0.7111111111111111,0.6176470588235294
14,14,77,77,76,75,0.987012987012987,0.5033112582781457,0.6666666666666667,0.5064935064935064
15,15,40,40,32,34,0.8,0.48484848484848486,0.6037735849056605,0.475
16,16,29,29,17,20,0.5862068965517241,0.4594594594594595,0.5151515151515151,0.4482758620689655
17,total,1043,1043,816,786,,,,
18,,,,,Micro avg.,0.7823585810162992,0.5093632958801498,0.6170132325141776,0.5143815915627996
19,,,,,Macro avg.,0.7615400073688694,0.4891482238948939,0.5897782409906281,0.52782370976909
threshold-0.5,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,9,2,0.3,0.8181818181818182,0.43902439024390244,0.6166666666666667
2,2,288,288,68,80,0.2361111111111111,0.4594594594594595,0.3119266055045871,0.4791666666666667
3,3,87,87,60,37,0.6896551724137931,0.6185567010309279,0.6521739130434783,0.632183908045977
4,4,94,94,6,7,0.06382978723404255,0.46153846153846156,0.11214953271028037,0.4946808510638298
5,5,62,62,35,46,0.5645161290322581,0.43209876543209874,0.4895104895104895,0.4112903225806452
6,6,63,63,46,8,0.7301587301587301,0.8518518518518519,0.7863247863247863,0.8015873015873016
7,7,17,17,12,10,0.7058823529411765,0.5454545454545454,0.6153846153846153,0.5588235294117647
8,8,65,65,45,39,0.6923076923076923,0.5357142857142857,0.6040268456375838,0.5461538461538461
9,9,31,31,8,4,0.25806451612903225,0.6666666666666666,0.37209302325581395,0.5645161290322581
10,10,57,57,46,45,0.8070175438596491,0.5054945054945055,0.6216216216216216,0.5087719298245614
11,11,48,48,38,38,0.7916666666666666,0.5,0.6129032258064516,0.5
12,12,36,36,14,8,0.3888888888888889,0.6363636363636364,0.4827586206896552,0.5833333333333334
13,13,17,17,15,9,0.8823529411764706,0.625,0.7317073170731708,0.6764705882352942
14,14,77,77,67,66,0.8701298701298701,0.5037593984962406,0.638095238095238,0.5064935064935064
15,15,40,40,16,25,0.4,0.3902439024390244,0.39506172839506176,0.3875
16,16,29,29,7,11,0.2413793103448276,0.3888888888888889,0.2978723404255319,0.43103448275862066
17,total,1043,1043,492,435,,,,
18,,,,,Micro avg.,0.47171620325982744,0.5307443365695793,0.4994923857868021,0.5273250239693192
19,,,,,Macro avg.,0.5071741595526005,0.5258395815889655,0.480154958454251,0.5410984154031924
threshold-0.75,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,3,0,0.1,1.0,0.18181818181818182,0.55
2,2,288,288,3,2,0.010416666666666666,0.6,0.020477815699658702,0.5017361111111112
3,3,87,87,9,1,0.10344827586206896,0.9,0.18556701030927833,0.5459770114942529
4,4,94,94,0,0,0.0,0.0,0.0,0.5
5,5,62,62,2,3,0.03225806451612903,0.4,0.05970149253731343,0.49193548387096775
6,6,63,63,11,0,0.1746031746031746,1.0,0.29729729729729726,0.5873015873015873
7,7,17,17,3,0,0.17647058823529413,1.0,0.3,0.5882352941176471
8,8,65,65,16,9,0.24615384615384617,0.64,0.35555555555555557,0.5538461538461539
9,9,31,31,0,0,0.0,0.0,0.0,0.5
10,10,57,57,14,9,0.24561403508771928,0.6086956521739131,0.35,0.543859649122807
11,11,48,48,19,4,0.3958333333333333,0.8260869565217391,0.5352112676056338,0.65625
12,12,36,36,2,1,0.05555555555555555,0.6666666666666666,0.10256410256410256,0.5138888888888888
13,13,17,17,5,3,0.29411764705882354,0.625,0.4,0.5588235294117647
14,14,77,77,27,25,0.35064935064935066,0.5192307692307693,0.4186046511627907,0.512987012987013
15,15,40,40,1,5,0.025,0.16666666666666666,0.04347826086956522,0.45
16,16,29,29,2,2,0.06896551724137931,0.5,0.1212121212121212,0.5
17,total,1043,1043,117,64,,,,
18,,,,,Micro avg.,0.11217641418983701,0.6464088397790055,0.19117647058823528,0.5254074784276127
19,,,,,Macro avg.,0.13406388558607887,0.5560203947799857,0.19832280921361758,0.5326376895383643
topk-9,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,15,7,0.5,0.6818181818181818,0.576923076923077,0.6333333333333333
2,2,288,288,188,181,0.6527777777777778,0.5094850948509485,0.5722983257229832,0.5121527777777778
3,3,87,87,76,66,0.8735632183908046,0.5352112676056338,0.6637554585152838,0.5574712643678161
4,4,94,94,44,44,0.46808510638297873,0.5,0.4835164835164835,0.5
5,5,62,62,43,49,0.6935483870967742,0.4673913043478261,0.5584415584415585,0.45161290322580644
6,6,63,63,53,24,0.8412698412698413,0.6883116883116883,0.7571428571428571,0.7301587301587301
7,7,17,17,16,16,0.9411764705882353,0.5,0.6530612244897959,0.5
8,8,65,65,63,56,0.9692307692307692,0.5294117647058824,0.6847826086956521,0.5538461538461539
9,9,31,31,23,18,0.7419354838709677,0.5609756097560976,0.638888888888889,0.5806451612903226
10,10,57,57,54,54,0.9473684210526315,0.5,0.6545454545454545,0.5
11,11,48,48,47,42,0.9791666666666666,0.5280898876404494,0.6861313868613137,0.5520833333333334
12,12,36,36,21,12,0.5833333333333334,0.6363636363636364,0.6086956521739131,0.625
13,13,17,17,17,8,1.0,0.68,0.8095238095238095,0.7647058823529411
14,14,77,77,77,75,1.0,0.506578947368421,0.6724890829694323,0.512987012987013
15,15,40,40,40,38,1.0,0.5128205128205128,0.6779661016949152,0.525
16,16,29,29,14,19,0.4827586206896552,0.42424242424242425,0.45161290322580644,0.41379310344827586
17,total,1043,1043,791,709,,,,
18,,,,,Micro avg.,0.7583892617449665,0.5273333333333333,0.6220998820290995,0.5393096836049857
19,,,,,Macro avg.,0.7455420056676727,0.5153353129312767,0.5970455807841897,0.5536935091836179
topk-7,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,13,4,0.43333333333333335,0.7647058823529411,0.5531914893617021,0.65
2,2,288,288,137,114,0.4756944444444444,0.545816733067729,0.5083487940630798,0.5399305555555556
3,3,87,87,73,55,0.8390804597701149,0.5703125,0.6790697674418604,0.603448275862069
4,4,94,94,30,32,0.3191489361702128,0.4838709677419355,0.38461538461538464,0.48936170212765956
5,5,62,62,33,36,0.532258064516129,0.4782608695652174,0.5038167938931298,0.47580645161290325
6,6,63,63,46,10,0.7301587301587301,0.8214285714285714,0.773109243697479,0.7857142857142857
7,7,17,17,16,16,0.9411764705882353,0.5,0.6530612244897959,0.5
8,8,65,65,57,50,0.8769230769230769,0.5327102803738317,0.6627906976744186,0.5538461538461539
9,9,31,31,20,9,0.6451612903225806,0.6896551724137931,0.6666666666666667,0.6774193548387096
10,10,57,57,53,50,0.9298245614035088,0.5145631067961165,0.6625,0.5263157894736842
11,11,48,48,45,37,0.9375,0.5487804878048781,0.6923076923076923,0.5833333333333334
12,12,36,36,11,7,0.3055555555555556,0.6111111111111112,0.40740740740740744,0.5555555555555556
13,13,17,17,16,7,0.9411764705882353,0.6956521739130435,0.7999999999999999,0.7647058823529411
14,14,77,77,77,73,1.0,0.5133333333333333,0.6784140969162996,0.525974025974026
15,15,40,40,38,37,0.95,0.5066666666666667,0.6608695652173914,0.5125
16,16,29,29,9,17,0.3103448275862069,0.34615384615384615,0.32727272727272727,0.3620689655172414
17,total,1043,1043,674,554,,,,
18,,,,,Micro avg.,0.6462128475551294,0.5488599348534202,0.5935711140466755,0.5575263662511984
19,,,,,Macro avg.,0.6569021306682565,0.536648335454295,0.5654965618250021,0.5650576665743599
topk-5,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,10,4,0.3333333333333333,0.7142857142857143,0.4545454545454545,0.6
2,2,288,288,86,52,0.2986111111111111,0.6231884057971014,0.40375586854460094,0.5590277777777778
3,3,87,87,69,49,0.7931034482758621,0.5847457627118644,0.6731707317073171,0.6149425287356322
4,4,94,94,15,20,0.1595744680851064,0.42857142857142855,0.23255813953488372,0.4734042553191489
5,5,62,62,10,20,0.16129032258064516,0.3333333333333333,0.21739130434782605,0.41935483870967744
6,6,63,63,30,2,0.47619047619047616,0.9375,0.631578947368421,0.7222222222222222
7,7,17,17,16,14,0.9411764705882353,0.5333333333333333,0.6808510638297872,0.5588235294117647
8,8,65,65,39,35,0.6,0.527027027027027,0.5611510791366906,0.5307692307692308
9,9,31,31,13,3,0.41935483870967744,0.8125,0.5531914893617021,0.6612903225806451
10,10,57,57,50,44,0.8771929824561403,0.5319148936170213,0.662251655629139,0.5526315789473685
11,11,48,48,34,19,0.7083333333333334,0.6415094339622641,0.6732673267326733,0.65625
12,12,36,36,10,5,0.2777777777777778,0.6666666666666666,0.39215686274509803,0.5694444444444444
13,13,17,17,15,6,0.8823529411764706,0.7142857142857143,0.7894736842105262,0.7647058823529411
14,14,77,77,69,68,0.8961038961038961,0.5036496350364964,0.6448598130841121,0.5064935064935064
15,15,40,40,37,35,0.925,0.5138888888888888,0.6607142857142857,0.525
16,16,29,29,5,10,0.1724137931034483,0.3333333333333333,0.22727272727272724,0.41379310344827586
17,total,1043,1043,508,386,,,,
18,,,,,Micro avg.,0.48705656759348037,0.5682326621923938,0.5245224574083636,0.5584851390220518
19,,,,,Macro avg.,0.5248123054603243,0.5529255041676581,0.4975406137508968,0.5663619541889785
topk-3,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,5,1,0.16666666666666666,0.8333333333333334,0.2777777777777778,0.5666666666666667
2,2,288,288,44,20,0.1527777777777778,0.6875,0.25000000000000006,0.5416666666666666
3,3,87,87,62,34,0.7126436781609196,0.6458333333333334,0.6775956284153006,0.6609195402298851
4,4,94,94,5,6,0.05319148936170213,0.45454545454545453,0.09523809523809525,0.4946808510638298
5,5,62,62,5,6,0.08064516129032258,0.45454545454545453,0.136986301369863,0.49193548387096775
6,6,63,63,13,0,0.20634920634920634,1.0,0.34210526315789475,0.6031746031746031
7,7,17,17,13,12,0.7647058823529411,0.52,0.6190476190476191,0.5294117647058824
8,8,65,65,22,17,0.3384615384615385,0.5641025641025641,0.4230769230769231,0.5384615384615384
9,9,31,31,7,2,0.22580645161290322,0.7777777777777778,0.35000000000000003,0.5806451612903226
10,10,57,57,31,27,0.543859649122807,0.5344827586206896,0.5391304347826087,0.5350877192982456
11,11,48,48,19,8,0.3958333333333333,0.7037037037037037,0.5066666666666667,0.6145833333333334
12,12,36,36,4,3,0.1111111111111111,0.5714285714285714,0.18604651162790697,0.5138888888888888
13,13,17,17,13,4,0.7647058823529411,0.7647058823529411,0.7647058823529412,0.7647058823529411
14,14,77,77,53,58,0.6883116883116883,0.4774774774774775,0.5638297872340425,0.4675324675324675
15,15,40,40,29,34,0.725,0.4603174603174603,0.5631067961165048,0.4375
16,16,29,29,3,5,0.10344827586206896,0.375,0.16216216216216217,0.46551724137931033
17,total,1043,1043,328,237,,,,
18,,,,,Micro avg.,0.3144774688398849,0.5805309734513274,0.4079601990049751,0.5436241610738255
19,,,,,Macro avg.,0.3549128113016428,0.5779266924434565,0.3798515205309592,0.5474339887597381