Zero_Shot_Classifier_Tester_for_TP_FP / Reports /report-Model-1_Queries-2_Prompt-1_Strategies-argmax-threshold0.05-threshold0.25-threshold0.5-threshold0.75-topk9-topk7-topk5-topk3.csv
gorkaartola's picture
Upload report-Model-1_Queries-2_Prompt-1_Strategies-argmax-threshold0.05-threshold0.25-threshold0.5-threshold0.75-topk9-topk7-topk5-topk3.csv
8504af4
argmax_max,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,0,0.5,1.0,0.6666666666666666,0.75
1,1,30,30,4,0,0.13333333333333333,1.0,0.23529411764705882,0.5666666666666667
2,2,288,288,134,113,0.4652777777777778,0.5425101214574899,0.5009345794392523,0.5364583333333334
3,3,87,87,29,18,0.3333333333333333,0.6170212765957447,0.43283582089552236,0.5632183908045977
4,4,94,94,41,24,0.43617021276595747,0.6307692307692307,0.5157232704402516,0.5904255319148937
5,5,62,62,14,16,0.22580645161290322,0.4666666666666667,0.3043478260869565,0.4838709677419355
6,6,63,63,29,0,0.4603174603174603,1.0,0.6304347826086957,0.7301587301587301
7,7,17,17,3,2,0.17647058823529413,0.6,0.2727272727272727,0.5294117647058824
8,8,65,65,17,12,0.26153846153846155,0.5862068965517241,0.36170212765957444,0.5384615384615384
9,9,31,31,7,0,0.22580645161290322,1.0,0.3684210526315789,0.6129032258064516
10,10,57,57,23,8,0.40350877192982454,0.7419354838709677,0.5227272727272727,0.631578947368421
11,11,48,48,4,4,0.08333333333333333,0.5,0.14285714285714285,0.5
12,12,36,36,6,2,0.16666666666666666,0.75,0.27272727272727276,0.5555555555555556
13,13,17,17,6,2,0.35294117647058826,0.75,0.48,0.6176470588235294
14,14,77,77,31,16,0.4025974025974026,0.6595744680851063,0.5,0.5974025974025974
15,15,40,40,11,11,0.275,0.5,0.3548387096774194,0.5
16,16,29,29,8,2,0.27586206896551724,0.8,0.4102564102564103,0.603448275862069
17,total,1043,1043,368,230,,,,
18,,,,,Micro avg.,0.35282837967401726,0.6153846153846154,0.4485070079219988,0.5661553211888782
19,,,,,Macro avg.,0.3045860876759269,0.7143931849409959,0.410146725002844,0.5827769167415413
threshold-0.05,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,2,2,1.0,0.5,0.6666666666666666,0.5
1,1,30,30,25,19,0.8333333333333334,0.5681818181818182,0.6756756756756758,0.6
2,2,288,288,275,282,0.9548611111111112,0.49371633752244165,0.6508875739644971,0.4878472222222222
3,3,87,87,86,84,0.9885057471264368,0.5058823529411764,0.669260700389105,0.5114942528735632
4,4,94,94,87,87,0.925531914893617,0.5,0.6492537313432836,0.5
5,5,62,62,52,58,0.8387096774193549,0.4727272727272727,0.6046511627906976,0.45161290322580644
6,6,63,63,56,40,0.8888888888888888,0.5833333333333334,0.7044025157232704,0.626984126984127
7,7,17,17,16,13,0.9411764705882353,0.5517241379310345,0.6956521739130435,0.5882352941176471
8,8,65,65,65,64,1.0,0.5038759689922481,0.6701030927835051,0.5076923076923077
9,9,31,31,30,27,0.967741935483871,0.5263157894736842,0.6818181818181819,0.5483870967741935
10,10,57,57,52,43,0.9122807017543859,0.5473684210526316,0.6842105263157894,0.5789473684210527
11,11,48,48,47,46,0.9791666666666666,0.5053763440860215,0.6666666666666667,0.5104166666666666
12,12,36,36,28,27,0.7777777777777778,0.509090909090909,0.6153846153846153,0.5138888888888888
13,13,17,17,15,14,0.8823529411764706,0.5172413793103449,0.6521739130434783,0.5294117647058824
14,14,77,77,74,65,0.961038961038961,0.5323741007194245,0.6851851851851852,0.5584415584415584
15,15,40,40,36,34,0.9,0.5142857142857142,0.6545454545454545,0.525
16,16,29,29,29,28,1.0,0.5087719298245614,0.6744186046511628,0.5172413793103449
17,total,1043,1043,975,933,,,,
18,,,,,Micro avg.,0.9348034515819751,0.5110062893081762,0.6607929515418502,0.5201342281879194
19,,,,,Macro avg.,0.9265509486623006,0.5200156358513304,0.6647621435800163,0.53268240178378
threshold-0.25,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,1,0.5,0.5,0.5,0.5
1,1,30,30,14,11,0.4666666666666667,0.56,0.509090909090909,0.55
2,2,288,288,258,252,0.8958333333333334,0.5058823529411764,0.6466165413533834,0.5104166666666666
3,3,87,87,80,73,0.9195402298850575,0.5228758169934641,0.6666666666666666,0.5402298850574713
4,4,94,94,73,64,0.776595744680851,0.5328467153284672,0.6320346320346321,0.5478723404255319
5,5,62,62,26,35,0.41935483870967744,0.4262295081967213,0.42276422764227645,0.4274193548387097
6,6,63,63,52,23,0.8253968253968254,0.6933333333333334,0.7536231884057971,0.7301587301587301
7,7,17,17,11,9,0.6470588235294118,0.55,0.5945945945945946,0.5588235294117647
8,8,65,65,60,58,0.9230769230769231,0.5084745762711864,0.6557377049180328,0.5153846153846153
9,9,31,31,17,7,0.5483870967741935,0.7083333333333334,0.6181818181818182,0.6612903225806451
10,10,57,57,39,24,0.6842105263157895,0.6190476190476191,0.6500000000000001,0.631578947368421
11,11,48,48,32,30,0.6666666666666666,0.5161290322580645,0.5818181818181819,0.5208333333333334
12,12,36,36,16,8,0.4444444444444444,0.6666666666666666,0.5333333333333333,0.6111111111111112
13,13,17,17,15,8,0.8823529411764706,0.6521739130434783,0.75,0.7058823529411765
14,14,77,77,67,47,0.8701298701298701,0.5877192982456141,0.7015706806282723,0.6298701298701299
15,15,40,40,23,26,0.575,0.46938775510204084,0.5168539325842697,0.4625
16,16,29,29,26,27,0.896551724137931,0.49056603773584906,0.6341463414634146,0.4827586206896552
17,total,1043,1043,810,703,,,,
18,,,,,Micro avg.,0.7766059443911792,0.5353602115003304,0.6338028169014084,0.551294343240652
19,,,,,Macro avg.,0.7024274502896537,0.5593921152057068,0.6098254560420932,0.5638899964610564
threshold-0.5,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,0,0.5,1.0,0.6666666666666666,0.75
1,1,30,30,11,6,0.36666666666666664,0.6470588235294118,0.46808510638297873,0.5833333333333334
2,2,288,288,218,191,0.7569444444444444,0.5330073349633252,0.6255380200860832,0.546875
3,3,87,87,61,44,0.7011494252873564,0.580952380952381,0.6354166666666667,0.5977011494252874
4,4,94,94,51,41,0.5425531914893617,0.5543478260869565,0.5483870967741936,0.5531914893617021
5,5,62,62,18,25,0.2903225806451613,0.4186046511627907,0.34285714285714286,0.4435483870967742
6,6,63,63,46,7,0.7301587301587301,0.8679245283018868,0.7931034482758621,0.8095238095238095
7,7,17,17,6,5,0.35294117647058826,0.5454545454545454,0.42857142857142855,0.5294117647058824
8,8,65,65,43,44,0.6615384615384615,0.4942528735632184,0.5657894736842105,0.49230769230769234
9,9,31,31,11,2,0.3548387096774194,0.8461538461538461,0.5,0.6451612903225806
10,10,57,57,35,15,0.6140350877192983,0.7,0.6542056074766356,0.6754385964912281
11,11,48,48,13,15,0.2708333333333333,0.4642857142857143,0.3421052631578947,0.4791666666666667
12,12,36,36,9,2,0.25,0.8181818181818182,0.38297872340425526,0.5972222222222222
13,13,17,17,13,5,0.7647058823529411,0.7222222222222222,0.7428571428571428,0.7352941176470589
14,14,77,77,57,33,0.7402597402597403,0.6333333333333333,0.6826347305389222,0.6558441558441559
15,15,40,40,13,12,0.325,0.52,0.4,0.5125
16,16,29,29,22,22,0.7586206896551724,0.5,0.6027397260273972,0.5
17,total,1043,1043,628,469,,,,
18,,,,,Micro avg.,0.6021093000958773,0.5724703737465816,0.5869158878504673,0.576222435282838
19,,,,,Macro avg.,0.5282687129234515,0.6379870528347911,0.5518786025545578,0.5945011573499055
threshold-0.75,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,4,2,0.13333333333333333,0.6666666666666666,0.2222222222222222,0.5333333333333333
2,2,288,288,96,63,0.3333333333333333,0.6037735849056604,0.4295302013422818,0.5572916666666666
3,3,87,87,9,7,0.10344827586206896,0.5625,0.17475728155339806,0.5114942528735632
4,4,94,94,36,24,0.3829787234042553,0.6,0.4675324675324675,0.5638297872340425
5,5,62,62,12,15,0.1935483870967742,0.4444444444444444,0.2696629213483146,0.47580645161290325
6,6,63,63,32,0,0.5079365079365079,1.0,0.6736842105263158,0.753968253968254
7,7,17,17,3,2,0.17647058823529413,0.6,0.2727272727272727,0.5294117647058824
8,8,65,65,9,11,0.13846153846153847,0.45,0.21176470588235297,0.4846153846153846
9,9,31,31,5,0,0.16129032258064516,1.0,0.27777777777777773,0.5806451612903226
10,10,57,57,24,9,0.42105263157894735,0.7272727272727273,0.5333333333333333,0.631578947368421
11,11,48,48,7,5,0.14583333333333334,0.5833333333333334,0.23333333333333336,0.5208333333333334
12,12,36,36,7,2,0.19444444444444445,0.7777777777777778,0.3111111111111111,0.5694444444444444
13,13,17,17,5,1,0.29411764705882354,0.8333333333333334,0.4347826086956522,0.6176470588235294
14,14,77,77,23,11,0.2987012987012987,0.6764705882352942,0.4144144144144144,0.577922077922078
15,15,40,40,7,4,0.175,0.6363636363636364,0.27450980392156865,0.5375
16,16,29,29,11,6,0.3793103448275862,0.6470588235294118,0.4782608695652174,0.5862068965517241
17,total,1043,1043,290,162,,,,
18,,,,,Micro avg.,0.27804410354745923,0.6415929203539823,0.38795986622073575,0.5613614573346117
19,,,,,Macro avg.,0.23760357118754025,0.6358232303448403,0.3340826197227667,0.5606781655731696
topk-9,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,0,0.5,1.0,0.6666666666666666,0.75
1,1,30,30,20,9,0.6666666666666666,0.6896551724137931,0.6779661016949153,0.6833333333333333
2,2,288,288,273,270,0.9479166666666666,0.5027624309392266,0.6570397111913358,0.5052083333333334
3,3,87,87,85,83,0.9770114942528736,0.5059523809523809,0.6666666666666666,0.5114942528735632
4,4,94,94,81,81,0.8617021276595744,0.5,0.6328125,0.5
5,5,62,62,28,32,0.45161290322580644,0.4666666666666667,0.45901639344262296,0.46774193548387094
6,6,63,63,53,23,0.8412698412698413,0.6973684210526315,0.7625899280575539,0.7380952380952381
7,7,17,17,14,12,0.8235294117647058,0.5384615384615384,0.6511627906976744,0.5588235294117647
8,8,65,65,64,61,0.9846153846153847,0.512,0.6736842105263159,0.5230769230769231
9,9,31,31,24,14,0.7741935483870968,0.631578947368421,0.6956521739130435,0.6612903225806451
10,10,57,57,42,32,0.7368421052631579,0.5675675675675675,0.6412213740458015,0.5877192982456141
11,11,48,48,35,35,0.7291666666666666,0.5,0.5932203389830509,0.5
12,12,36,36,16,13,0.4444444444444444,0.5517241379310345,0.4923076923076923,0.5416666666666666
13,13,17,17,16,8,0.9411764705882353,0.6666666666666666,0.7804878048780487,0.7352941176470589
14,14,77,77,72,51,0.935064935064935,0.5853658536585366,0.72,0.6363636363636364
15,15,40,40,35,34,0.875,0.5072463768115942,0.6422018348623854,0.5125
16,16,29,29,29,29,1.0,0.5,0.6666666666666666,0.5
17,total,1043,1043,888,787,,,,
18,,,,,Micro avg.,0.8513902205177373,0.5301492537313433,0.6534216335540839,0.548418024928092
19,,,,,Macro avg.,0.7935419215609444,0.5837068329700034,0.6517272267412024,0.583094563947744
topk-7,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,0,0.5,1.0,0.6666666666666666,0.75
1,1,30,30,15,9,0.5,0.625,0.5555555555555556,0.6
2,2,288,288,267,261,0.9270833333333334,0.5056818181818182,0.6544117647058824,0.5104166666666666
3,3,87,87,83,82,0.9540229885057471,0.503030303030303,0.6587301587301587,0.5057471264367817
4,4,94,94,77,70,0.8191489361702128,0.5238095238095238,0.6390041493775933,0.5372340425531915
5,5,62,62,23,29,0.3709677419354839,0.4423076923076923,0.4035087719298246,0.45161290322580644
6,6,63,63,50,18,0.7936507936507936,0.7352941176470589,0.7633587786259542,0.753968253968254
7,7,17,17,13,8,0.7647058823529411,0.6190476190476191,0.6842105263157895,0.6470588235294118
8,8,65,65,63,58,0.9692307692307692,0.5206611570247934,0.6774193548387097,0.5384615384615384
9,9,31,31,20,9,0.6451612903225806,0.6896551724137931,0.6666666666666667,0.6774193548387096
10,10,57,57,39,27,0.6842105263157895,0.5909090909090909,0.6341463414634148,0.6052631578947368
11,11,48,48,24,22,0.5,0.5217391304347826,0.5106382978723404,0.5208333333333334
12,12,36,36,14,7,0.3888888888888889,0.6666666666666666,0.49122807017543857,0.5972222222222222
13,13,17,17,15,6,0.8823529411764706,0.7142857142857143,0.7894736842105262,0.7647058823529411
14,14,77,77,66,44,0.8571428571428571,0.6,0.7058823529411764,0.6428571428571429
15,15,40,40,31,32,0.775,0.49206349206349204,0.6019417475728155,0.4875
16,16,29,29,29,28,1.0,0.5087719298245614,0.6744186046511628,0.5172413793103449
17,total,1043,1043,830,710,,,,
18,,,,,Micro avg.,0.7957813998082455,0.538961038961039,0.6426635694928379,0.5575263662511984
19,,,,,Macro avg.,0.7253862911191689,0.6034660839792301,0.6339565583705691,0.5945612839794755
topk-5,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,0,0.5,1.0,0.6666666666666666,0.75
1,1,30,30,12,6,0.4,0.6666666666666666,0.5,0.6
2,2,288,288,256,243,0.8888888888888888,0.5130260521042084,0.6505717916137229,0.5225694444444444
3,3,87,87,75,73,0.8620689655172413,0.5067567567567568,0.6382978723404256,0.5114942528735632
4,4,94,94,66,63,0.7021276595744681,0.5116279069767442,0.5919282511210763,0.5159574468085106
5,5,62,62,20,28,0.3225806451612903,0.4166666666666667,0.3636363636363636,0.43548387096774194
6,6,63,63,48,10,0.7619047619047619,0.8275862068965517,0.7933884297520662,0.8015873015873016
7,7,17,17,9,6,0.5294117647058824,0.6,0.5625,0.5882352941176471
8,8,65,65,55,52,0.8461538461538461,0.514018691588785,0.6395348837209301,0.5230769230769231
9,9,31,31,17,6,0.5483870967741935,0.7391304347826086,0.6296296296296297,0.6774193548387096
10,10,57,57,35,20,0.6140350877192983,0.6363636363636364,0.625,0.631578947368421
11,11,48,48,14,14,0.2916666666666667,0.5,0.3684210526315789,0.5
12,12,36,36,12,5,0.3333333333333333,0.7058823529411765,0.45283018867924524,0.5972222222222222
13,13,17,17,15,5,0.8823529411764706,0.75,0.8108108108108107,0.7941176470588235
14,14,77,77,60,37,0.7792207792207793,0.6185567010309279,0.6896551724137931,0.6493506493506493
15,15,40,40,27,28,0.675,0.4909090909090909,0.5684210526315789,0.4875
16,16,29,29,28,27,0.9655172413793104,0.509090909090909,0.6666666666666667,0.5172413793103449
17,total,1043,1043,750,623,,,,
18,,,,,Micro avg.,0.7190795781399808,0.5462490895848507,0.6208609271523179,0.5608820709491851
19,,,,,Macro avg.,0.6413323340103781,0.6180165925161605,0.6010564019008563,0.5942843961191355
topk-3,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,0,0.5,1.0,0.6666666666666666,0.75
1,1,30,30,8,4,0.26666666666666666,0.6666666666666666,0.3809523809523809,0.5666666666666667
2,2,288,288,239,207,0.8298611111111112,0.5358744394618834,0.6512261580381471,0.5555555555555556
3,3,87,87,60,51,0.6896551724137931,0.5405405405405406,0.6060606060606061,0.5517241379310345
4,4,94,94,59,50,0.6276595744680851,0.5412844036697247,0.5812807881773399,0.5478723404255319
5,5,62,62,16,23,0.25806451612903225,0.41025641025641024,0.31683168316831684,0.4435483870967742
6,6,63,63,44,4,0.6984126984126984,0.9166666666666666,0.7927927927927927,0.8174603174603174
7,7,17,17,8,5,0.47058823529411764,0.6153846153846154,0.5333333333333333,0.5882352941176471
8,8,65,65,39,43,0.6,0.47560975609756095,0.5306122448979591,0.46923076923076923
9,9,31,31,12,3,0.3870967741935484,0.8,0.5217391304347827,0.6451612903225806
10,10,57,57,31,10,0.543859649122807,0.7560975609756098,0.6326530612244897,0.6842105263157895
11,11,48,48,11,9,0.22916666666666666,0.55,0.32352941176470584,0.5208333333333334
12,12,36,36,8,3,0.2222222222222222,0.7272727272727273,0.3404255319148936,0.5694444444444444
13,13,17,17,13,4,0.7647058823529411,0.7647058823529411,0.7647058823529412,0.7647058823529411
14,14,77,77,47,32,0.6103896103896104,0.5949367088607594,0.6025641025641025,0.5974025974025974
15,15,40,40,21,22,0.525,0.4883720930232558,0.5060240963855422,0.4875
16,16,29,29,24,18,0.8275862068965517,0.5714285714285714,0.676056338028169,0.603448275862069
17,total,1043,1043,641,488,,,,
18,,,,,Micro avg.,0.6145733461169702,0.5677590788308238,0.5902394106813996,0.573346116970278
19,,,,,Macro avg.,0.5324079403729325,0.6444174730975255,0.5545561299268923,0.5978235187363561