Zero_Shot_Classifier_Tester_for_TP_FP / Reports /report-Model-0_Queries-1_Prompt-0_Strategies-argmax-threshold0.05-threshold0.25-threshold0.5-threshold0.75-topk9-topk7-topk5-topk3.csv
gorkaartola's picture
Upload report-Model-0_Queries-1_Prompt-0_Strategies-argmax-threshold0.05-threshold0.25-threshold0.5-threshold0.75-topk9-topk7-topk5-topk3.csv
21bd6f9
argmax_max,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,1,0,0.03333333333333333,1.0,0.06451612903225806,0.5166666666666667
2,2,288,288,2,1,0.006944444444444444,0.6666666666666666,0.013745704467353952,0.5017361111111112
3,3,87,87,9,1,0.10344827586206896,0.9,0.18556701030927833,0.5459770114942529
4,4,94,94,5,0,0.05319148936170213,1.0,0.10101010101010102,0.526595744680851
5,5,62,62,0,0,0.0,0.0,0.0,0.5
6,6,63,63,0,0,0.0,0.0,0.0,0.5
7,7,17,17,0,0,0.0,0.0,0.0,0.5
8,8,65,65,3,2,0.046153846153846156,0.6,0.08571428571428573,0.5076923076923077
9,9,31,31,2,3,0.06451612903225806,0.4,0.1111111111111111,0.4838709677419355
10,10,57,57,2,0,0.03508771929824561,1.0,0.06779661016949151,0.5175438596491229
11,11,48,48,39,25,0.8125,0.609375,0.6964285714285714,0.6458333333333334
12,12,36,36,1,3,0.027777777777777776,0.25,0.049999999999999996,0.4722222222222222
13,13,17,17,2,0,0.11764705882352941,1.0,0.21052631578947367,0.5588235294117647
14,14,77,77,4,1,0.05194805194805195,0.8,0.0975609756097561,0.5194805194805194
15,15,40,40,15,12,0.375,0.5555555555555556,0.44776119402985076,0.5375
16,16,29,29,1,0,0.034482758620689655,1.0,0.06666666666666667,0.5172413793103449
17,total,1043,1043,86,48,,,,
18,,,,,Micro avg.,0.08245445829338446,0.6417910447761194,0.1461342395921835,0.5182166826462129
19,,,,,Macro avg.,0.10364887556799691,0.5753880718954248,0.12931792207871756,0.5206578619290844
threshold-0.05,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,2,0.5,0.3333333333333333,0.4,0.25
1,1,30,30,23,19,0.7666666666666667,0.5476190476190477,0.6388888888888888,0.5666666666666667
2,2,288,288,148,156,0.5138888888888888,0.4868421052631579,0.5,0.4861111111111111
3,3,87,87,86,81,0.9885057471264368,0.5149700598802395,0.6771653543307087,0.5287356321839081
4,4,94,94,92,91,0.9787234042553191,0.5027322404371585,0.6642599277978339,0.5053191489361702
5,5,62,62,48,48,0.7741935483870968,0.5,0.6075949367088608,0.5
6,6,63,63,60,45,0.9523809523809523,0.5714285714285714,0.7142857142857142,0.6190476190476191
7,7,17,17,17,14,1.0,0.5483870967741935,0.7083333333333333,0.5882352941176471
8,8,65,65,63,61,0.9692307692307692,0.5080645161290323,0.6666666666666667,0.5153846153846153
9,9,31,31,29,31,0.9354838709677419,0.48333333333333334,0.6373626373626374,0.46774193548387094
10,10,57,57,53,49,0.9298245614035088,0.5196078431372549,0.6666666666666666,0.5350877192982456
11,11,48,48,47,48,0.9791666666666666,0.49473684210526314,0.6573426573426573,0.4895833333333333
12,12,36,36,35,36,0.9722222222222222,0.49295774647887325,0.6542056074766356,0.4861111111111111
13,13,17,17,12,15,0.7058823529411765,0.4444444444444444,0.5454545454545455,0.4117647058823529
14,14,77,77,67,64,0.8701298701298701,0.5114503816793893,0.6442307692307692,0.5194805194805194
15,15,40,40,39,39,0.975,0.5,0.6610169491525423,0.5
16,16,29,29,28,29,0.9655172413793104,0.49122807017543857,0.6511627906976745,0.4827586206896552
17,total,1043,1043,848,828,,,,
18,,,,,Micro avg.,0.8130393096836049,0.5059665871121718,0.6237587348289811,0.5095877277085331
19,,,,,Macro avg.,0.8692245154498014,0.49712562542463123,0.6290963203174196,0.49717811957216623
threshold-0.25,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,0,0.5,1.0,0.6666666666666666,0.75
1,1,30,30,17,6,0.5666666666666667,0.7391304347826086,0.6415094339622641,0.6833333333333333
2,2,288,288,55,61,0.1909722222222222,0.47413793103448276,0.27227722772277224,0.4895833333333333
3,3,87,87,80,55,0.9195402298850575,0.5925925925925926,0.7207207207207207,0.6436781609195402
4,4,94,94,76,78,0.8085106382978723,0.4935064935064935,0.6129032258064516,0.48936170212765956
5,5,62,62,16,15,0.25806451612903225,0.5161290322580645,0.3440860215053763,0.5080645161290323
6,6,63,63,29,18,0.4603174603174603,0.6170212765957447,0.5272727272727272,0.5873015873015873
7,7,17,17,15,12,0.8823529411764706,0.5555555555555556,0.6818181818181819,0.5882352941176471
8,8,65,65,57,51,0.8769230769230769,0.5277777777777778,0.6589595375722545,0.5461538461538461
9,9,31,31,26,31,0.8387096774193549,0.45614035087719296,0.5909090909090909,0.41935483870967744
10,10,57,57,36,24,0.631578947368421,0.6,0.6153846153846154,0.6052631578947368
11,11,48,48,45,47,0.9375,0.4891304347826087,0.6428571428571429,0.4791666666666667
12,12,36,36,33,34,0.9166666666666666,0.4925373134328358,0.6407766990291262,0.4861111111111111
13,13,17,17,12,14,0.7058823529411765,0.46153846153846156,0.558139534883721,0.4411764705882353
14,14,77,77,40,40,0.5194805194805194,0.5,0.5095541401273885,0.5
15,15,40,40,36,38,0.9,0.4864864864864865,0.631578947368421,0.475
16,16,29,29,25,26,0.8620689655172413,0.49019607843137253,0.625,0.4827586206896552
17,total,1043,1043,599,550,,,,
18,,,,,Micro avg.,0.5743048897411314,0.5213228894691035,0.5465328467153284,0.5234899328859061
19,,,,,Macro avg.,0.6926608753536022,0.5583458952736633,0.5847302302121719,0.53967897876918
threshold-0.5,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,7,1,0.23333333333333334,0.875,0.3684210526315789,0.6
2,2,288,288,13,13,0.04513888888888889,0.5,0.08280254777070065,0.5
3,3,87,87,33,15,0.3793103448275862,0.6875,0.4888888888888889,0.603448275862069
4,4,94,94,19,8,0.20212765957446807,0.7037037037037037,0.31404958677685946,0.5585106382978723
5,5,62,62,3,1,0.04838709677419355,0.75,0.0909090909090909,0.5161290322580645
6,6,63,63,3,2,0.047619047619047616,0.6,0.08823529411764705,0.5079365079365079
7,7,17,17,4,1,0.23529411764705882,0.8,0.3636363636363636,0.5882352941176471
8,8,65,65,36,29,0.5538461538461539,0.5538461538461539,0.5538461538461539,0.5538461538461539
9,9,31,31,15,15,0.4838709677419355,0.5,0.4918032786885246,0.5
10,10,57,57,19,12,0.3333333333333333,0.6129032258064516,0.43181818181818177,0.5614035087719298
11,11,48,48,44,34,0.9166666666666666,0.5641025641025641,0.6984126984126983,0.6041666666666666
12,12,36,36,11,12,0.3055555555555556,0.4782608695652174,0.3728813559322034,0.4861111111111111
13,13,17,17,5,10,0.29411764705882354,0.3333333333333333,0.3125,0.35294117647058826
14,14,77,77,13,20,0.16883116883116883,0.3939393939393939,0.23636363636363636,0.45454545454545453
15,15,40,40,24,33,0.6,0.42105263157894735,0.4948453608247421,0.3875
16,16,29,29,11,12,0.3793103448275862,0.4782608695652174,0.4230769230769231,0.4827586206896552
17,total,1043,1043,260,218,,,,
18,,,,,Micro avg.,0.24928092042186,0.5439330543933054,0.3418803418803419,0.5201342281879194
19,,,,,Macro avg.,0.3074554309721058,0.5442295732612342,0.34191120080554077,0.5151489670925717
threshold-0.75,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,3,0,0.1,1.0,0.18181818181818182,0.55
2,2,288,288,2,4,0.006944444444444444,0.3333333333333333,0.01360544217687075,0.4965277777777778
3,3,87,87,5,3,0.05747126436781609,0.625,0.10526315789473682,0.5114942528735632
4,4,94,94,3,0,0.031914893617021274,1.0,0.06185567010309278,0.5159574468085106
5,5,62,62,0,0,0.0,0.0,0.0,0.5
6,6,63,63,0,0,0.0,0.0,0.0,0.5
7,7,17,17,0,0,0.0,0.0,0.0,0.5
8,8,65,65,10,3,0.15384615384615385,0.7692307692307693,0.2564102564102564,0.5538461538461539
9,9,31,31,1,1,0.03225806451612903,0.5,0.06060606060606061,0.5
10,10,57,57,7,1,0.12280701754385964,0.875,0.2153846153846154,0.5526315789473685
11,11,48,48,33,22,0.6875,0.6,0.6407766990291262,0.6145833333333334
12,12,36,36,2,2,0.05555555555555555,0.5,0.09999999999999999,0.5
13,13,17,17,2,0,0.11764705882352941,1.0,0.21052631578947367,0.5588235294117647
14,14,77,77,4,2,0.05194805194805195,0.6666666666666666,0.0963855421686747,0.512987012987013
15,15,40,40,5,2,0.125,0.7142857142857143,0.2127659574468085,0.5375
16,16,29,29,2,0,0.06896551724137931,1.0,0.12903225806451613,0.5344827586206896
17,total,1043,1043,79,40,,,,
18,,,,,Micro avg.,0.07574304889741132,0.6638655462184874,0.1359724612736661,0.5186960690316395
19,,,,,Macro avg.,0.09481517775905532,0.5637362637362637,0.13437824452308317,0.525813755565069
topk-9,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,2,0.5,0.3333333333333333,0.4,0.25
1,1,30,30,16,5,0.5333333333333333,0.7619047619047619,0.6274509803921569,0.6833333333333333
2,2,288,288,37,43,0.1284722222222222,0.4625,0.2010869565217391,0.4895833333333333
3,3,87,87,72,43,0.8275862068965517,0.6260869565217392,0.7128712871287128,0.6666666666666666
4,4,94,94,66,59,0.7021276595744681,0.528,0.6027397260273973,0.5372340425531915
5,5,62,62,10,7,0.16129032258064516,0.5882352941176471,0.25316455696202533,0.5241935483870968
6,6,63,63,15,3,0.23809523809523808,0.8333333333333334,0.37037037037037035,0.5952380952380952
7,7,17,17,13,11,0.7647058823529411,0.5416666666666666,0.6341463414634146,0.5588235294117647
8,8,65,65,55,49,0.8461538461538461,0.5288461538461539,0.650887573964497,0.5461538461538461
9,9,31,31,26,29,0.8387096774193549,0.4727272727272727,0.6046511627906976,0.45161290322580644
10,10,57,57,33,17,0.5789473684210527,0.66,0.616822429906542,0.6403508771929824
11,11,48,48,47,47,0.9791666666666666,0.5,0.6619718309859155,0.5
12,12,36,36,32,33,0.8888888888888888,0.49230769230769234,0.6336633663366337,0.4861111111111111
13,13,17,17,11,13,0.6470588235294118,0.4583333333333333,0.5365853658536585,0.4411764705882353
14,14,77,77,30,33,0.38961038961038963,0.47619047619047616,0.42857142857142855,0.4805194805194805
15,15,40,40,38,38,0.95,0.5,0.6551724137931034,0.5
16,16,29,29,20,19,0.6896551724137931,0.5128205128205128,0.5882352941176471,0.5172413793103449
17,total,1043,1043,522,451,,,,
18,,,,,Micro avg.,0.5004793863854267,0.5364850976361768,0.5178571428571428,0.5340364333652924
19,,,,,Macro avg.,0.6272824528328707,0.5456638698295837,0.5399053579521141,0.5216610951191346
topk-7,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,12,4,0.4,0.75,0.5217391304347827,0.6333333333333333
2,2,288,288,21,27,0.07291666666666667,0.4375,0.12500000000000003,0.4895833333333333
3,3,87,87,58,31,0.6666666666666666,0.651685393258427,0.6590909090909091,0.6551724137931034
4,4,94,94,50,42,0.5319148936170213,0.5434782608695652,0.5376344086021505,0.5425531914893617
5,5,62,62,4,2,0.06451612903225806,0.6666666666666666,0.1176470588235294,0.5161290322580645
6,6,63,63,4,0,0.06349206349206349,1.0,0.11940298507462686,0.5317460317460317
7,7,17,17,11,9,0.6470588235294118,0.55,0.5945945945945946,0.5588235294117647
8,8,65,65,53,46,0.8153846153846154,0.5353535353535354,0.6463414634146342,0.5538461538461539
9,9,31,31,19,27,0.6129032258064516,0.41304347826086957,0.49350649350649356,0.3709677419354839
10,10,57,57,25,17,0.43859649122807015,0.5952380952380952,0.505050505050505,0.5701754385964912
11,11,48,48,47,45,0.9791666666666666,0.5108695652173914,0.6714285714285714,0.5208333333333334
12,12,36,36,24,29,0.6666666666666666,0.4528301886792453,0.5393258426966292,0.4305555555555556
13,13,17,17,11,12,0.6470588235294118,0.4782608695652174,0.55,0.47058823529411764
14,14,77,77,25,22,0.3246753246753247,0.5319148936170213,0.40322580645161293,0.5194805194805194
15,15,40,40,38,36,0.95,0.5135135135135135,0.6666666666666667,0.525
16,16,29,29,20,16,0.6896551724137931,0.5555555555555556,0.6153846153846154,0.5689655172413793
17,total,1043,1043,422,365,,,,
18,,,,,Micro avg.,0.4046021093000959,0.5362134688691232,0.46120218579234973,0.5273250239693192
19,,,,,Macro avg.,0.5041571899632405,0.5403476479879473,0.45682582654237197,0.5269266682734133
topk-5,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,9,1,0.3,0.9,0.45000000000000007,0.6333333333333333
2,2,288,288,17,15,0.059027777777777776,0.53125,0.10624999999999998,0.5034722222222222
3,3,87,87,43,18,0.4942528735632184,0.7049180327868853,0.581081081081081,0.6436781609195402
4,4,94,94,35,21,0.3723404255319149,0.625,0.4666666666666666,0.574468085106383
5,5,62,62,1,2,0.016129032258064516,0.3333333333333333,0.03076923076923077,0.49193548387096775
6,6,63,63,2,0,0.031746031746031744,1.0,0.06153846153846154,0.5158730158730159
7,7,17,17,7,3,0.4117647058823529,0.7,0.5185185185185185,0.6176470588235294
8,8,65,65,45,36,0.6923076923076923,0.5555555555555556,0.6164383561643836,0.5692307692307692
9,9,31,31,11,21,0.3548387096774194,0.34375,0.3492063492063492,0.3387096774193548
10,10,57,57,18,13,0.3157894736842105,0.5806451612903226,0.4090909090909091,0.543859649122807
11,11,48,48,45,39,0.9375,0.5357142857142857,0.6818181818181818,0.5625
12,12,36,36,22,23,0.6111111111111112,0.4888888888888889,0.5432098765432098,0.4861111111111111
13,13,17,17,9,9,0.5294117647058824,0.5,0.5142857142857143,0.5
14,14,77,77,14,15,0.18181818181818182,0.4827586206896552,0.2641509433962264,0.4935064935064935
15,15,40,40,34,33,0.85,0.5074626865671642,0.6355140186915889,0.5125
16,16,29,29,12,12,0.41379310344827586,0.5,0.4528301886792453,0.5
17,total,1043,1043,324,261,,,,
18,,,,,Micro avg.,0.31064237775647174,0.5538461538461539,0.39803439803439805,0.5302013422818792
19,,,,,Macro avg.,0.3865782872654196,0.5464280332250642,0.39302167626175105,0.5286367682670311
topk-3,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,5,1,0.16666666666666666,0.8333333333333334,0.2777777777777778,0.5666666666666667
2,2,288,288,8,4,0.027777777777777776,0.6666666666666666,0.05333333333333333,0.5069444444444444
3,3,87,87,25,6,0.28735632183908044,0.8064516129032258,0.42372881355932196,0.6091954022988506
4,4,94,94,18,12,0.19148936170212766,0.6,0.29032258064516125,0.5319148936170213
5,5,62,62,1,0,0.016129032258064516,1.0,0.031746031746031744,0.5080645161290323
6,6,63,63,0,0,0.0,0.0,0.0,0.5
7,7,17,17,2,1,0.11764705882352941,0.6666666666666666,0.2,0.5294117647058824
8,8,65,65,30,16,0.46153846153846156,0.6521739130434783,0.5405405405405406,0.6076923076923076
9,9,31,31,7,14,0.22580645161290322,0.3333333333333333,0.2692307692307692,0.3870967741935484
10,10,57,57,12,5,0.21052631578947367,0.7058823529411765,0.3243243243243243,0.5614035087719298
11,11,48,48,44,35,0.9166666666666666,0.5569620253164557,0.6929133858267716,0.59375
12,12,36,36,9,13,0.25,0.4090909090909091,0.31034482758620685,0.4444444444444444
13,13,17,17,5,6,0.29411764705882354,0.45454545454545453,0.35714285714285715,0.47058823529411764
14,14,77,77,8,7,0.1038961038961039,0.5333333333333333,0.17391304347826086,0.5064935064935064
15,15,40,40,25,29,0.625,0.46296296296296297,0.5319148936170213,0.45
16,16,29,29,3,3,0.10344827586206896,0.5,0.17142857142857143,0.5
17,total,1043,1043,202,152,,,,
18,,,,,Micro avg.,0.19367209971236818,0.5706214689265536,0.28919112383679313,0.5239693192713327
19,,,,,Macro avg.,0.23518036126422046,0.5400825037727646,0.2734506911904088,0.5160980273383383