Zero_Shot_Classifier_Tester_for_TP_FP / Reports /report-Model-0_Queries-0_Prompt-2_Strategies-argmax-threshold0.05-threshold0.25-threshold0.5-threshold0.75-topk9-topk7-topk5-topk3.csv
gorkaartola's picture
Upload report-Model-0_Queries-0_Prompt-2_Strategies-argmax-threshold0.05-threshold0.25-threshold0.5-threshold0.75-topk9-topk7-topk5-topk3.csv
7db66f3
argmax_max,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,0,0,0.0,0.0,0.0,0.5
2,2,288,288,71,54,0.2465277777777778,0.568,0.3438256658595642,0.5295138888888888
3,3,87,87,19,5,0.21839080459770116,0.7916666666666666,0.34234234234234234,0.5804597701149425
4,4,94,94,45,13,0.4787234042553192,0.7758620689655172,0.5921052631578948,0.6702127659574468
5,5,62,62,6,3,0.0967741935483871,0.6666666666666666,0.16901408450704225,0.5241935483870968
6,6,63,63,22,1,0.3492063492063492,0.9565217391304348,0.5116279069767442,0.6666666666666666
7,7,17,17,5,1,0.29411764705882354,0.8333333333333334,0.4347826086956522,0.6176470588235294
8,8,65,65,5,7,0.07692307692307693,0.4166666666666667,0.12987012987012989,0.4846153846153846
9,9,31,31,4,1,0.12903225806451613,0.8,0.2222222222222222,0.5483870967741935
10,10,57,57,30,17,0.5263157894736842,0.6382978723404256,0.5769230769230769,0.6140350877192983
11,11,48,48,25,13,0.5208333333333334,0.6578947368421053,0.5813953488372092,0.625
12,12,36,36,14,9,0.3888888888888889,0.6086956521739131,0.47457627118644075,0.5694444444444444
13,13,17,17,9,2,0.5294117647058824,0.8181818181818182,0.6428571428571428,0.7058823529411765
14,14,77,77,43,25,0.5584415584415584,0.6323529411764706,0.5931034482758619,0.6168831168831169
15,15,40,40,0,0,0.0,0.0,0.0,0.5
16,16,29,29,4,10,0.13793103448275862,0.2857142857142857,0.18604651162790697,0.39655172413793105
17,total,1043,1043,302,161,,,,
18,,,,,Micro avg.,0.28954937679769893,0.652267818574514,0.40106241699867196,0.5675934803451582
19,,,,,Macro avg.,0.2677363459269445,0.5558737910504884,0.34121717784348415,0.5676172297855361
threshold-0.05,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,14,5,0.4666666666666667,0.7368421052631579,0.5714285714285714,0.65
2,2,288,288,155,143,0.5381944444444444,0.5201342281879194,0.5290102389078497,0.5208333333333334
3,3,87,87,48,30,0.5517241379310345,0.6153846153846154,0.5818181818181819,0.603448275862069
4,4,94,94,62,35,0.6595744680851063,0.6391752577319587,0.6492146596858638,0.6436170212765957
5,5,62,62,26,24,0.41935483870967744,0.52,0.46428571428571425,0.5161290322580645
6,6,63,63,47,12,0.746031746031746,0.7966101694915254,0.7704918032786885,0.7777777777777778
7,7,17,17,12,8,0.7058823529411765,0.6,0.6486486486486486,0.6176470588235294
8,8,65,65,36,24,0.5538461538461539,0.6,0.5760000000000001,0.5923076923076923
9,9,31,31,21,20,0.6774193548387096,0.5121951219512195,0.5833333333333334,0.5161290322580645
10,10,57,57,41,25,0.7192982456140351,0.6212121212121212,0.6666666666666667,0.6403508771929824
11,11,48,48,45,45,0.9375,0.5,0.6521739130434783,0.5
12,12,36,36,28,32,0.7777777777777778,0.4666666666666667,0.5833333333333334,0.4444444444444444
13,13,17,17,14,10,0.8235294117647058,0.5833333333333334,0.6829268292682927,0.6176470588235294
14,14,77,77,65,59,0.8441558441558441,0.5241935483870968,0.6467661691542288,0.538961038961039
15,15,40,40,7,12,0.175,0.3684210526315789,0.23728813559322032,0.4375
16,16,29,29,22,24,0.7586206896551724,0.4782608695652174,0.5866666666666667,0.46551724137931033
17,total,1043,1043,643,508,,,,
18,,,,,Micro avg.,0.6164908916586769,0.5586446568201564,0.5861440291704649,0.5647171620325983
19,,,,,Macro avg.,0.6090927136742501,0.5342605346944946,0.5547089920654551,0.5636652873352019
threshold-0.25,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,9,2,0.3,0.8181818181818182,0.43902439024390244,0.6166666666666667
2,2,288,288,115,100,0.3993055555555556,0.5348837209302325,0.4572564612326043,0.5260416666666666
3,3,87,87,34,16,0.39080459770114945,0.68,0.4963503649635037,0.603448275862069
4,4,94,94,53,23,0.5638297872340425,0.6973684210526315,0.6235294117647059,0.6595744680851063
5,5,62,62,18,15,0.2903225806451613,0.5454545454545454,0.37894736842105264,0.5241935483870968
6,6,63,63,42,9,0.6666666666666666,0.8235294117647058,0.7368421052631577,0.7619047619047619
7,7,17,17,9,2,0.5294117647058824,0.8181818181818182,0.6428571428571428,0.7058823529411765
8,8,65,65,28,15,0.4307692307692308,0.6511627906976745,0.5185185185185186,0.6
9,9,31,31,15,13,0.4838709677419355,0.5357142857142857,0.5084745762711865,0.532258064516129
10,10,57,57,37,22,0.6491228070175439,0.6271186440677966,0.6379310344827587,0.631578947368421
11,11,48,48,38,38,0.7916666666666666,0.5,0.6129032258064516,0.5
12,12,36,36,23,25,0.6388888888888888,0.4791666666666667,0.5476190476190476,0.4722222222222222
13,13,17,17,9,7,0.5294117647058824,0.5625,0.5454545454545455,0.5588235294117647
14,14,77,77,58,47,0.7532467532467533,0.5523809523809524,0.6373626373626373,0.5714285714285714
15,15,40,40,4,7,0.1,0.36363636363636365,0.15686274509803924,0.4625
16,16,29,29,20,21,0.6896551724137931,0.4878048780487805,0.5714285714285714,0.4827586206896552
17,total,1043,1043,512,362,,,,
18,,,,,Micro avg.,0.49089165867689355,0.585812356979405,0.5341679707876891,0.5719079578139981
19,,,,,Macro avg.,0.48276312964465595,0.5692402539281336,0.5006683615757546,0.5711342174206063
threshold-0.5,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,5,0,0.16666666666666666,1.0,0.2857142857142857,0.5833333333333334
2,2,288,288,95,75,0.3298611111111111,0.5588235294117647,0.4148471615720524,0.5347222222222222
3,3,87,87,19,10,0.21839080459770116,0.6551724137931034,0.3275862068965517,0.5517241379310345
4,4,94,94,46,16,0.48936170212765956,0.7419354838709677,0.5897435897435898,0.6595744680851063
5,5,62,62,7,9,0.11290322580645161,0.4375,0.1794871794871795,0.4838709677419355
6,6,63,63,36,5,0.5714285714285714,0.8780487804878049,0.6923076923076923,0.746031746031746
7,7,17,17,9,1,0.5294117647058824,0.9,0.6666666666666667,0.7352941176470589
8,8,65,65,20,12,0.3076923076923077,0.625,0.4123711340206186,0.5615384615384615
9,9,31,31,8,10,0.25806451612903225,0.4444444444444444,0.32653061224489793,0.46774193548387094
10,10,57,57,37,19,0.6491228070175439,0.6607142857142857,0.6548672566371682,0.6578947368421053
11,11,48,48,36,26,0.75,0.5806451612903226,0.6545454545454547,0.6041666666666666
12,12,36,36,23,19,0.6388888888888888,0.5476190476190477,0.5897435897435898,0.5555555555555556
13,13,17,17,7,5,0.4117647058823529,0.5833333333333334,0.4827586206896552,0.5588235294117647
14,14,77,77,50,38,0.6493506493506493,0.5681818181818182,0.6060606060606061,0.577922077922078
15,15,40,40,2,5,0.05,0.2857142857142857,0.0851063829787234,0.4625
16,16,29,29,20,19,0.6896551724137931,0.5128205128205128,0.5882352941176471,0.5172413793103449
17,total,1043,1043,420,269,,,,
18,,,,,Micro avg.,0.40268456375838924,0.6095791001451378,0.4849884526558891,0.5723873441994247
19,,,,,Macro avg.,0.4013272290481536,0.5870560645106879,0.4445042196133163,0.5739961962190168
threshold-0.75,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,2,0,0.06666666666666667,1.0,0.125,0.5333333333333333
2,2,288,288,66,43,0.22916666666666666,0.6055045871559633,0.3324937027707809,0.5399305555555556
3,3,87,87,11,4,0.12643678160919541,0.7333333333333333,0.21568627450980393,0.5402298850574713
4,4,94,94,43,11,0.4574468085106383,0.7962962962962963,0.5810810810810811,0.6702127659574468
5,5,62,62,4,1,0.06451612903225806,0.8,0.11940298507462686,0.5241935483870968
6,6,63,63,24,3,0.38095238095238093,0.8888888888888888,0.5333333333333333,0.6666666666666666
7,7,17,17,7,0,0.4117647058823529,1.0,0.5833333333333334,0.7058823529411765
8,8,65,65,10,7,0.15384615384615385,0.5882352941176471,0.24390243902439027,0.5230769230769231
9,9,31,31,4,2,0.12903225806451613,0.6666666666666666,0.2162162162162162,0.532258064516129
10,10,57,57,36,18,0.631578947368421,0.6666666666666666,0.6486486486486486,0.6578947368421053
11,11,48,48,33,18,0.6875,0.6470588235294118,0.6666666666666667,0.65625
12,12,36,36,18,13,0.5,0.5806451612903226,0.537313432835821,0.5694444444444444
13,13,17,17,6,3,0.35294117647058826,0.6666666666666666,0.46153846153846156,0.5882352941176471
14,14,77,77,38,23,0.4935064935064935,0.6229508196721312,0.5507246376811594,0.5974025974025974
15,15,40,40,2,1,0.05,0.6666666666666666,0.09302325581395349,0.5125
16,16,29,29,17,12,0.5862068965517241,0.5862068965517241,0.5862068965517241,0.5862068965517241
17,total,1043,1043,321,159,,,,
18,,,,,Micro avg.,0.3077660594439118,0.66875,0.42153644123440576,0.5776605944391179
19,,,,,Macro avg.,0.31303306265459147,0.6773992216177872,0.3820336097105883,0.582571650873548
topk-9,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,2,0.5,0.3333333333333333,0.4,0.25
1,1,30,30,20,12,0.6666666666666666,0.625,0.6451612903225806,0.6333333333333333
2,2,288,288,240,224,0.8333333333333334,0.5172413793103449,0.6382978723404256,0.5277777777777778
3,3,87,87,62,50,0.7126436781609196,0.5535714285714286,0.6231155778894473,0.5689655172413793
4,4,94,94,77,67,0.8191489361702128,0.5347222222222222,0.6470588235294118,0.5531914893617021
5,5,62,62,25,28,0.4032258064516129,0.4716981132075472,0.43478260869565216,0.47580645161290325
6,6,63,63,54,21,0.8571428571428571,0.72,0.782608695652174,0.7619047619047619
7,7,17,17,13,12,0.7647058823529411,0.52,0.6190476190476191,0.5294117647058824
8,8,65,65,47,31,0.7230769230769231,0.6025641025641025,0.6573426573426574,0.6230769230769231
9,9,31,31,29,29,0.9354838709677419,0.5,0.6516853932584269,0.5
10,10,57,57,46,31,0.8070175438596491,0.5974025974025974,0.6865671641791045,0.631578947368421
11,11,48,48,48,48,1.0,0.5,0.6666666666666666,0.5
12,12,36,36,32,33,0.8888888888888888,0.49230769230769234,0.6336633663366337,0.4861111111111111
13,13,17,17,17,11,1.0,0.6071428571428571,0.7555555555555554,0.6764705882352942
14,14,77,77,72,68,0.935064935064935,0.5142857142857142,0.663594470046083,0.525974025974026
15,15,40,40,13,14,0.325,0.48148148148148145,0.3880597014925373,0.4875
16,16,29,29,28,28,0.9655172413793104,0.5,0.6588235294117647,0.5
17,total,1043,1043,824,709,,,,
18,,,,,Micro avg.,0.7900287631831256,0.5375081539465101,0.639751552795031,0.5551294343240653
19,,,,,Macro avg.,0.7727597978538818,0.5335735836370188,0.6207077053980435,0.543006040688442
topk-7,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,0,0.5,1.0,0.6666666666666666,0.75
1,1,30,30,17,7,0.5666666666666667,0.7083333333333334,0.6296296296296297,0.6666666666666666
2,2,288,288,211,193,0.7326388888888888,0.5222772277227723,0.6098265895953757,0.53125
3,3,87,87,52,39,0.5977011494252874,0.5714285714285714,0.5842696629213484,0.5747126436781609
4,4,94,94,71,52,0.7553191489361702,0.5772357723577236,0.6543778801843319,0.601063829787234
5,5,62,62,19,21,0.3064516129032258,0.475,0.37254901960784315,0.4838709677419355
6,6,63,63,51,14,0.8095238095238095,0.7846153846153846,0.7968749999999999,0.7936507936507936
7,7,17,17,12,8,0.7058823529411765,0.6,0.6486486486486486,0.6176470588235294
8,8,65,65,42,25,0.6461538461538462,0.6268656716417911,0.6363636363636365,0.6307692307692307
9,9,31,31,27,25,0.8709677419354839,0.5192307692307693,0.6506024096385542,0.532258064516129
10,10,57,57,44,25,0.7719298245614035,0.6376811594202898,0.6984126984126984,0.6666666666666666
11,11,48,48,47,45,0.9791666666666666,0.5108695652173914,0.6714285714285714,0.5208333333333334
12,12,36,36,30,33,0.8333333333333334,0.47619047619047616,0.6060606060606061,0.4583333333333333
13,13,17,17,15,9,0.8823529411764706,0.625,0.7317073170731708,0.6764705882352942
14,14,77,77,69,64,0.8961038961038961,0.518796992481203,0.6571428571428571,0.5324675324675324
15,15,40,40,9,9,0.225,0.5,0.3103448275862069,0.5
16,16,29,29,27,27,0.9310344827586207,0.5,0.6506024096385543,0.5
17,total,1043,1043,744,596,,,,
18,,,,,Micro avg.,0.713326941514861,0.5552238805970149,0.624422996223248,0.5709491850431447
19,,,,,Macro avg.,0.7064839036455851,0.5972661719788063,0.6220887312116882,0.5903918064511668
topk-5,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,0,0.5,1.0,0.6666666666666666,0.75
1,1,30,30,12,5,0.4,0.7058823529411765,0.5106382978723405,0.6166666666666667
2,2,288,288,172,150,0.5972222222222222,0.5341614906832298,0.5639344262295082,0.5381944444444444
3,3,87,87,47,25,0.5402298850574713,0.6527777777777778,0.5911949685534591,0.6264367816091954
4,4,94,94,65,37,0.6914893617021277,0.6372549019607843,0.663265306122449,0.648936170212766
5,5,62,62,15,14,0.24193548387096775,0.5172413793103449,0.3296703296703297,0.5080645161290323
6,6,63,63,44,7,0.6984126984126984,0.8627450980392157,0.7719298245614035,0.7936507936507936
7,7,17,17,11,6,0.6470588235294118,0.6470588235294118,0.6470588235294118,0.6470588235294118
8,8,65,65,32,20,0.49230769230769234,0.6153846153846154,0.5470085470085471,0.5923076923076923
9,9,31,31,24,19,0.7741935483870968,0.5581395348837209,0.6486486486486487,0.5806451612903226
10,10,57,57,40,23,0.7017543859649122,0.6349206349206349,0.6666666666666666,0.6491228070175439
11,11,48,48,42,37,0.875,0.5316455696202531,0.6614173228346456,0.5520833333333334
12,12,36,36,26,31,0.7222222222222222,0.45614035087719296,0.5591397849462365,0.4305555555555556
13,13,17,17,14,8,0.8235294117647058,0.6363636363636364,0.717948717948718,0.6764705882352942
14,14,77,77,68,57,0.8831168831168831,0.544,0.6732673267326733,0.5714285714285714
15,15,40,40,3,6,0.075,0.3333333333333333,0.12244897959183673,0.4625
16,16,29,29,26,23,0.896551724137931,0.5306122448979592,0.6666666666666666,0.5517241379310345
17,total,1043,1043,642,468,,,,
18,,,,,Micro avg.,0.6155321188878236,0.5783783783783784,0.5963771481653507,0.5834132310642378
19,,,,,Macro avg.,0.6211779025115496,0.6116271614425465,0.5886806649558946,0.5997556496083327
topk-3,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,4,2,0.13333333333333333,0.6666666666666666,0.2222222222222222,0.5333333333333333
2,2,288,288,134,109,0.4652777777777778,0.551440329218107,0.504708097928437,0.5434027777777778
3,3,87,87,34,18,0.39080459770114945,0.6538461538461539,0.4892086330935252,0.5919540229885057
4,4,94,94,57,25,0.6063829787234043,0.6951219512195121,0.6477272727272727,0.6702127659574468
5,5,62,62,7,10,0.11290322580645161,0.4117647058823529,0.17721518987341772,0.47580645161290325
6,6,63,63,40,4,0.6349206349206349,0.9090909090909091,0.7476635514018691,0.7857142857142857
7,7,17,17,9,4,0.5294117647058824,0.6923076923076923,0.5999999999999999,0.6470588235294118
8,8,65,65,20,11,0.3076923076923077,0.6451612903225806,0.4166666666666667,0.5692307692307692
9,9,31,31,17,12,0.5483870967741935,0.5862068965517241,0.5666666666666665,0.5806451612903226
10,10,57,57,37,19,0.6491228070175439,0.6607142857142857,0.6548672566371682,0.6578947368421053
11,11,48,48,37,31,0.7708333333333334,0.5441176470588235,0.6379310344827587,0.5625
12,12,36,36,24,24,0.6666666666666666,0.5,0.5714285714285715,0.5
13,13,17,17,13,7,0.7647058823529411,0.65,0.7027027027027027,0.6764705882352942
14,14,77,77,61,37,0.7922077922077922,0.6224489795918368,0.6971428571428572,0.6558441558441559
15,15,40,40,3,6,0.075,0.3333333333333333,0.12244897959183673,0.4625
16,16,29,29,17,20,0.5862068965517241,0.4594594594594595,0.5151515151515151,0.4482758620689655
17,total,1043,1043,514,339,,,,
18,,,,,Micro avg.,0.4928092042186002,0.6025791324736225,0.5421940928270041,0.5838926174496645
19,,,,,Macro avg.,0.4725798291508903,0.563628252956673,0.48669124810102865,0.580049631436781