Zero_Shot_Classifier_Tester_for_TP_FP / Reports /report-Model-0_Queries-2_Prompt-3_Strategies-argmax-threshold0.05-threshold0.25-threshold0.5-threshold0.75-topk9-topk7-topk5-topk3.csv
gorkaartola's picture
Upload report-Model-0_Queries-2_Prompt-3_Strategies-argmax-threshold0.05-threshold0.25-threshold0.5-threshold0.75-topk9-topk7-topk5-topk3.csv
5072ceb
argmax_max,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,5,2,0.16666666666666666,0.7142857142857143,0.2702702702702703,0.55
2,2,288,288,73,39,0.2534722222222222,0.6517857142857143,0.365,0.5590277777777778
3,3,87,87,7,2,0.08045977011494253,0.7777777777777778,0.14583333333333331,0.5287356321839081
4,4,94,94,17,6,0.18085106382978725,0.7391304347826086,0.2905982905982906,0.5585106382978723
5,5,62,62,2,1,0.03225806451612903,0.6666666666666666,0.06153846153846154,0.5080645161290323
6,6,63,63,13,0,0.20634920634920634,1.0,0.34210526315789475,0.6031746031746031
7,7,17,17,3,1,0.17647058823529413,0.75,0.2857142857142857,0.5588235294117647
8,8,65,65,2,3,0.03076923076923077,0.4,0.05714285714285715,0.49230769230769234
9,9,31,31,0,0,0.0,0.0,0.0,0.5
10,10,57,57,17,5,0.2982456140350877,0.7727272727272727,0.430379746835443,0.6052631578947368
11,11,48,48,0,0,0.0,0.0,0.0,0.5
12,12,36,36,6,2,0.16666666666666666,0.75,0.27272727272727276,0.5555555555555556
13,13,17,17,8,0,0.47058823529411764,1.0,0.6399999999999999,0.7352941176470589
14,14,77,77,8,5,0.1038961038961039,0.6153846153846154,0.1777777777777778,0.5194805194805194
15,15,40,40,29,31,0.725,0.48333333333333334,0.58,0.475
16,16,29,29,5,4,0.1724137931034483,0.5555555555555556,0.26315789473684215,0.5172413793103449
17,total,1043,1043,195,101,,,,
18,,,,,Micro avg.,0.186960690316395,0.6587837837837838,0.29126213592233013,0.5450623202301055
19,,,,,Macro avg.,0.1802416015117002,0.5809792402823093,0.2460144384607488,0.545087007010051
threshold-0.05,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,2,2,1.0,0.5,0.6666666666666666,0.5
1,1,30,30,29,26,0.9666666666666667,0.5272727272727272,0.6823529411764705,0.55
2,2,288,288,287,284,0.9965277777777778,0.5026269702276708,0.6682188591385332,0.5052083333333334
3,3,87,87,84,71,0.9655172413793104,0.5419354838709678,0.6942148760330579,0.5747126436781609
4,4,94,94,94,91,1.0,0.5081081081081081,0.6738351254480287,0.5159574468085106
5,5,62,62,44,56,0.7096774193548387,0.44,0.5432098765432098,0.4032258064516129
6,6,63,63,60,53,0.9523809523809523,0.5309734513274337,0.6818181818181819,0.5555555555555556
7,7,17,17,17,16,1.0,0.5151515151515151,0.6799999999999999,0.5294117647058824
8,8,65,65,61,61,0.9384615384615385,0.5,0.6524064171122995,0.5
9,9,31,31,31,31,1.0,0.5,0.6666666666666666,0.5
10,10,57,57,55,50,0.9649122807017544,0.5238095238095238,0.6790123456790124,0.543859649122807
11,11,48,48,43,43,0.8958333333333334,0.5,0.6417910447761194,0.5
12,12,36,36,33,35,0.9166666666666666,0.4852941176470588,0.6346153846153846,0.4722222222222222
13,13,17,17,17,15,1.0,0.53125,0.6938775510204082,0.5588235294117647
14,14,77,77,75,73,0.974025974025974,0.5067567567567568,0.6666666666666667,0.512987012987013
15,15,40,40,40,40,1.0,0.5,0.6666666666666666,0.5
16,16,29,29,29,29,1.0,0.5,0.6666666666666666,0.5
17,total,1043,1043,1001,976,,,,
18,,,,,Micro avg.,0.959731543624161,0.5063227111785533,0.6629139072847682,0.5119846596356663
19,,,,,Macro avg.,0.9576864618087538,0.5066575678924565,0.6622756433349434,0.513056703780992
threshold-0.25,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,2,1,1.0,0.6666666666666666,0.8,0.75
1,1,30,30,27,20,0.9,0.574468085106383,0.7012987012987012,0.6166666666666667
2,2,288,288,270,259,0.9375,0.5103969754253308,0.6609547123623011,0.5190972222222222
3,3,87,87,75,47,0.8620689655172413,0.6147540983606558,0.7177033492822965,0.6609195402298851
4,4,94,94,77,70,0.8191489361702128,0.5238095238095238,0.6390041493775933,0.5372340425531915
5,5,62,62,34,48,0.5483870967741935,0.4146341463414634,0.4722222222222222,0.3870967741935484
6,6,63,63,58,39,0.9206349206349206,0.5979381443298969,0.725,0.6507936507936508
7,7,17,17,15,13,0.8823529411764706,0.5357142857142857,0.6666666666666667,0.5588235294117647
8,8,65,65,45,47,0.6923076923076923,0.4891304347826087,0.5732484076433121,0.4846153846153846
9,9,31,31,26,28,0.8387096774193549,0.48148148148148145,0.6117647058823529,0.46774193548387094
10,10,57,57,46,38,0.8070175438596491,0.5476190476190477,0.652482269503546,0.5701754385964912
11,11,48,48,35,34,0.7291666666666666,0.5072463768115942,0.5982905982905983,0.5104166666666666
12,12,36,36,26,31,0.7222222222222222,0.45614035087719296,0.5591397849462365,0.4305555555555556
13,13,17,17,15,10,0.8823529411764706,0.6,0.7142857142857143,0.6470588235294118
14,14,77,77,71,57,0.922077922077922,0.5546875,0.6926829268292682,0.5909090909090909
15,15,40,40,39,40,0.975,0.4936708860759494,0.6554621848739496,0.4875
16,16,29,29,29,29,1.0,0.5,0.6666666666666666,0.5
17,total,1043,1043,890,811,,,,
18,,,,,Micro avg.,0.8533077660594439,0.5232216343327455,0.6486880466472305,0.5378715244487057
19,,,,,Macro avg.,0.8493498544707656,0.5334328237295343,0.6533454741253779,0.5511531953780825
threshold-0.5,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,0,0.5,1.0,0.6666666666666666,0.75
1,1,30,30,23,11,0.7666666666666667,0.6764705882352942,0.71875,0.7
2,2,288,288,213,176,0.7395833333333334,0.5475578406169666,0.6292466765140324,0.5642361111111112
3,3,87,87,49,19,0.5632183908045977,0.7205882352941176,0.632258064516129,0.6724137931034483
4,4,94,94,46,32,0.48936170212765956,0.5897435897435898,0.5348837209302325,0.574468085106383
5,5,62,62,23,21,0.3709677419354839,0.5227272727272727,0.4339622641509434,0.5161290322580645
6,6,63,63,54,18,0.8571428571428571,0.75,0.7999999999999999,0.7857142857142857
7,7,17,17,11,9,0.6470588235294118,0.55,0.5945945945945946,0.5588235294117647
8,8,65,65,26,14,0.4,0.65,0.49523809523809526,0.5923076923076923
9,9,31,31,11,5,0.3548387096774194,0.6875,0.4680851063829787,0.5967741935483871
10,10,57,57,33,18,0.5789473684210527,0.6470588235294118,0.6111111111111113,0.631578947368421
11,11,48,48,22,11,0.4583333333333333,0.6666666666666666,0.5432098765432098,0.6145833333333334
12,12,36,36,15,17,0.4166666666666667,0.46875,0.4411764705882353,0.4722222222222222
13,13,17,17,14,6,0.8235294117647058,0.7,0.7567567567567567,0.7352941176470589
14,14,77,77,67,42,0.8701298701298701,0.6146788990825688,0.7204301075268817,0.6623376623376623
15,15,40,40,30,35,0.75,0.46153846153846156,0.5714285714285714,0.4375
16,16,29,29,28,29,0.9655172413793104,0.49122807017543857,0.6511627906976745,0.4827586206896552
17,total,1043,1043,666,463,,,,
18,,,,,Micro avg.,0.638542665388303,0.5899025686448184,0.6132596685082873,0.5973154362416108
19,,,,,Macro avg.,0.6207036539360216,0.6320299086829286,0.6040565219791831,0.6086553897740875
threshold-0.75,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,6,1,0.2,0.8571428571428571,0.32432432432432434,0.5833333333333334
2,2,288,288,37,18,0.1284722222222222,0.6727272727272727,0.21574344023323613,0.5329861111111112
3,3,87,87,13,1,0.14942528735632185,0.9285714285714286,0.2574257425742575,0.5689655172413793
4,4,94,94,13,8,0.13829787234042554,0.6190476190476191,0.22608695652173913,0.526595744680851
5,5,62,62,2,1,0.03225806451612903,0.6666666666666666,0.06153846153846154,0.5080645161290323
6,6,63,63,18,2,0.2857142857142857,0.9,0.4337349397590361,0.626984126984127
7,7,17,17,3,0,0.17647058823529413,1.0,0.3,0.5882352941176471
8,8,65,65,7,5,0.1076923076923077,0.5833333333333334,0.18181818181818185,0.5153846153846153
9,9,31,31,6,2,0.1935483870967742,0.75,0.30769230769230765,0.5645161290322581
10,10,57,57,22,7,0.38596491228070173,0.7586206896551724,0.5116279069767441,0.631578947368421
11,11,48,48,3,0,0.0625,1.0,0.11764705882352941,0.53125
12,12,36,36,6,3,0.16666666666666666,0.6666666666666666,0.26666666666666666,0.5416666666666666
13,13,17,17,5,0,0.29411764705882354,1.0,0.45454545454545453,0.6470588235294118
14,14,77,77,6,5,0.07792207792207792,0.5454545454545454,0.13636363636363635,0.5064935064935064
15,15,40,40,13,14,0.325,0.48148148148148145,0.3880597014925373,0.4875
16,16,29,29,16,16,0.5517241379310345,0.5,0.5245901639344263,0.5
17,total,1043,1043,176,83,,,,
18,,,,,Micro avg.,0.16874400767018216,0.6795366795366795,0.27035330261136714,0.5445829338446788
19,,,,,Macro avg.,0.19269261511959201,0.7017477976910025,0.2769332319567375,0.5506243136513154
topk-9,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,2,1,1.0,0.6666666666666666,0.8,0.75
1,1,30,30,26,22,0.8666666666666667,0.5416666666666666,0.6666666666666667,0.5666666666666667
2,2,288,288,278,264,0.9652777777777778,0.5129151291512916,0.6698795180722892,0.5243055555555556
3,3,87,87,73,46,0.8390804597701149,0.6134453781512605,0.70873786407767,0.6551724137931034
4,4,94,94,77,84,0.8191489361702128,0.4782608695652174,0.603921568627451,0.4627659574468085
5,5,62,62,31,40,0.5,0.43661971830985913,0.46616541353383456,0.4274193548387097
6,6,63,63,57,40,0.9047619047619048,0.5876288659793815,0.7125,0.6349206349206349
7,7,17,17,16,14,0.9411764705882353,0.5333333333333333,0.6808510638297872,0.5588235294117647
8,8,65,65,48,44,0.7384615384615385,0.5217391304347826,0.6114649681528661,0.5307692307692308
9,9,31,31,30,27,0.967741935483871,0.5263157894736842,0.6818181818181819,0.5483870967741935
10,10,57,57,42,40,0.7368421052631579,0.5121951219512195,0.6043165467625898,0.5175438596491229
11,11,48,48,25,23,0.5208333333333334,0.5208333333333334,0.5208333333333334,0.5208333333333334
12,12,36,36,27,27,0.75,0.5,0.6,0.5
13,13,17,17,17,10,1.0,0.6296296296296297,0.7727272727272727,0.7058823529411765
14,14,77,77,71,56,0.922077922077922,0.5590551181102362,0.696078431372549,0.5974025974025974
15,15,40,40,40,40,1.0,0.5,0.6666666666666666,0.5
16,16,29,29,29,29,1.0,0.5,0.6666666666666666,0.5
17,total,1043,1043,889,807,,,,
18,,,,,Micro avg.,0.8523489932885906,0.5241745283018868,0.6491420226359985,0.5393096836049857
19,,,,,Macro avg.,0.8512981794326314,0.5376649853386213,0.6546643624886955,0.5588760343236999
topk-7,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,2,1,1.0,0.6666666666666666,0.8,0.75
1,1,30,30,25,21,0.8333333333333334,0.5434782608695652,0.6578947368421053,0.5666666666666667
2,2,288,288,265,252,0.9201388888888888,0.5125725338491296,0.6583850931677019,0.5225694444444444
3,3,87,87,71,40,0.8160919540229885,0.6396396396396397,0.7171717171717172,0.6781609195402298
4,4,94,94,72,74,0.7659574468085106,0.4931506849315068,0.6,0.48936170212765956
5,5,62,62,24,30,0.3870967741935484,0.4444444444444444,0.41379310344827586,0.45161290322580644
6,6,63,63,57,31,0.9047619047619048,0.6477272727272727,0.7549668874172185,0.7063492063492064
7,7,17,17,15,13,0.8823529411764706,0.5357142857142857,0.6666666666666667,0.5588235294117647
8,8,65,65,40,39,0.6153846153846154,0.5063291139240507,0.5555555555555556,0.5076923076923077
9,9,31,31,29,23,0.9354838709677419,0.5576923076923077,0.6987951807228916,0.5967741935483871
10,10,57,57,41,32,0.7192982456140351,0.5616438356164384,0.6307692307692307,0.5789473684210527
11,11,48,48,21,20,0.4375,0.5121951219512195,0.47191011235955055,0.5104166666666666
12,12,36,36,19,21,0.5277777777777778,0.475,0.5,0.4722222222222222
13,13,17,17,16,9,0.9411764705882353,0.64,0.7619047619047621,0.7058823529411765
14,14,77,77,69,50,0.8961038961038961,0.5798319327731093,0.7040816326530612,0.6233766233766234
15,15,40,40,40,40,1.0,0.5,0.6666666666666666,0.5
16,16,29,29,29,29,1.0,0.5,0.6666666666666666,0.5
17,total,1043,1043,835,725,,,,
18,,,,,Micro avg.,0.800575263662512,0.5352564102564102,0.6415674222051478,0.552732502396932
19,,,,,Macro avg.,0.798968124683644,0.5480050647529198,0.6426604712948276,0.5716974180373067
topk-5,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,0,0.5,1.0,0.6666666666666666,0.75
1,1,30,30,21,16,0.7,0.5675675675675675,0.626865671641791,0.5833333333333334
2,2,288,288,251,222,0.8715277777777778,0.5306553911205074,0.6596583442838371,0.5503472222222222
3,3,87,87,64,30,0.735632183908046,0.6808510638297872,0.7071823204419889,0.6954022988505747
4,4,94,94,62,54,0.6595744680851063,0.5344827586206896,0.5904761904761905,0.5425531914893617
5,5,62,62,21,20,0.3387096774193548,0.5121951219512195,0.4077669902912621,0.5080645161290323
6,6,63,63,53,18,0.8412698412698413,0.7464788732394366,0.791044776119403,0.7777777777777778
7,7,17,17,12,12,0.7058823529411765,0.5,0.5853658536585366,0.5
8,8,65,65,32,23,0.49230769230769234,0.5818181818181818,0.5333333333333333,0.5692307692307692
9,9,31,31,24,12,0.7741935483870968,0.6666666666666666,0.7164179104477612,0.6935483870967742
10,10,57,57,35,18,0.6140350877192983,0.660377358490566,0.6363636363636364,0.6491228070175439
11,11,48,48,15,14,0.3125,0.5172413793103449,0.38961038961038963,0.5104166666666666
12,12,36,36,14,13,0.3888888888888889,0.5185185185185185,0.4444444444444444,0.5138888888888888
13,13,17,17,16,7,0.9411764705882353,0.6956521739130435,0.7999999999999999,0.7647058823529411
14,14,77,77,66,44,0.8571428571428571,0.6,0.7058823529411764,0.6428571428571429
15,15,40,40,40,40,1.0,0.5,0.6666666666666666,0.5
16,16,29,29,29,29,1.0,0.5,0.6666666666666666,0.5
17,total,1043,1043,756,572,,,,
18,,,,,Micro avg.,0.7248322147651006,0.5692771084337349,0.6377056094474906,0.5882070949185043
19,,,,,Macro avg.,0.6901671086138454,0.6066179444145017,0.6232007184737499,0.6030146402301781
topk-3,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,0,0.5,1.0,0.6666666666666666,0.75
1,1,30,30,15,9,0.5,0.625,0.5555555555555556,0.6
2,2,288,288,204,152,0.7083333333333334,0.5730337078651685,0.6335403726708074,0.5902777777777778
3,3,87,87,40,14,0.45977011494252873,0.7407407407407407,0.5673758865248226,0.6494252873563219
4,4,94,94,46,22,0.48936170212765956,0.6764705882352942,0.5679012345679013,0.6276595744680851
5,5,62,62,9,4,0.14516129032258066,0.6923076923076923,0.24000000000000002,0.5403225806451613
6,6,63,63,42,0,0.6666666666666666,1.0,0.8,0.8333333333333334
7,7,17,17,10,8,0.5882352941176471,0.5555555555555556,0.5714285714285715,0.5588235294117647
8,8,65,65,14,9,0.2153846153846154,0.6086956521739131,0.31818181818181823,0.5384615384615384
9,9,31,31,8,2,0.25806451612903225,0.8,0.3902439024390244,0.5967741935483871
10,10,57,57,26,12,0.45614035087719296,0.6842105263157895,0.5473684210526316,0.6228070175438597
11,11,48,48,3,2,0.0625,0.6,0.11320754716981132,0.5104166666666666
12,12,36,36,8,6,0.2222222222222222,0.5714285714285714,0.32,0.5277777777777778
13,13,17,17,13,2,0.7647058823529411,0.8666666666666667,0.8125,0.8235294117647058
14,14,77,77,44,24,0.5714285714285714,0.6470588235294118,0.6068965517241379,0.6298701298701299
15,15,40,40,39,40,0.975,0.4936708860759494,0.6554621848739496,0.4875
16,16,29,29,26,26,0.896551724137931,0.5,0.6419753086419753,0.5
17,total,1043,1043,548,332,,,,
18,,,,,Micro avg.,0.5254074784276127,0.6227272727272727,0.5699427977119085,0.6035474592521572
19,,,,,Macro avg.,0.4987956637672306,0.6844023182879267,0.5299002365586867,0.6109987540367947