Zero_Shot_Classifier_Tester_for_TP_FP / Reports /report-Model-1_Queries-2_Prompt-0_Strategies-argmax-threshold0.05-threshold0.25-threshold0.5-threshold0.75-topk9-topk7-topk5-topk3.csv
gorkaartola's picture
Upload report-Model-1_Queries-2_Prompt-0_Strategies-argmax-threshold0.05-threshold0.25-threshold0.5-threshold0.75-topk9-topk7-topk5-topk3.csv
ade241e
argmax_max,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,0,0.5,1.0,0.6666666666666666,0.75
1,1,30,30,9,4,0.3,0.6923076923076923,0.41860465116279066,0.5833333333333334
2,2,288,288,109,85,0.3784722222222222,0.5618556701030928,0.45228215767634855,0.5416666666666666
3,3,87,87,29,9,0.3333333333333333,0.7631578947368421,0.46399999999999997,0.6149425287356322
4,4,94,94,45,33,0.4787234042553192,0.5769230769230769,0.5232558139534883,0.5638297872340425
5,5,62,62,8,9,0.12903225806451613,0.47058823529411764,0.20253164556962025,0.49193548387096775
6,6,63,63,27,0,0.42857142857142855,1.0,0.6,0.7142857142857143
7,7,17,17,6,3,0.35294117647058826,0.6666666666666666,0.46153846153846156,0.5882352941176471
8,8,65,65,21,11,0.3230769230769231,0.65625,0.43298969072164956,0.5769230769230769
9,9,31,31,8,0,0.25806451612903225,1.0,0.41025641025641024,0.6290322580645161
10,10,57,57,22,12,0.38596491228070173,0.6470588235294118,0.48351648351648346,0.5877192982456141
11,11,48,48,11,2,0.22916666666666666,0.8461538461538461,0.360655737704918,0.59375
12,12,36,36,10,4,0.2777777777777778,0.7142857142857143,0.4,0.5833333333333334
13,13,17,17,10,2,0.5882352941176471,0.8333333333333334,0.6896551724137931,0.7352941176470589
14,14,77,77,29,11,0.37662337662337664,0.725,0.49572649572649574,0.6168831168831169
15,15,40,40,12,10,0.3,0.5454545454545454,0.3870967741935483,0.525
16,16,29,29,6,7,0.20689655172413793,0.46153846153846156,0.28571428571428575,0.4827586206896552
17,total,1043,1043,363,202,,,,
18,,,,,Micro avg.,0.3480345158197507,0.6424778761061947,0.4514925373134328,0.5771812080536913
19,,,,,Macro avg.,0.3439341083125688,0.7153278800192235,0.45497002628323296,0.598760154707669
threshold-0.05,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,2,1,1.0,0.6666666666666666,0.8,0.75
1,1,30,30,26,19,0.8666666666666667,0.5777777777777777,0.6933333333333332,0.6166666666666667
2,2,288,288,282,276,0.9791666666666666,0.5053763440860215,0.6666666666666667,0.5104166666666666
3,3,87,87,82,73,0.9425287356321839,0.5290322580645161,0.6776859504132231,0.5517241379310345
4,4,94,94,92,90,0.9787234042553191,0.5054945054945055,0.6666666666666667,0.5106382978723404
5,5,62,62,58,57,0.9354838709677419,0.5043478260869565,0.655367231638418,0.5080645161290323
6,6,63,63,57,50,0.9047619047619048,0.5327102803738317,0.6705882352941176,0.5555555555555556
7,7,17,17,16,15,0.9411764705882353,0.5161290322580645,0.6666666666666666,0.5294117647058824
8,8,65,65,64,62,0.9846153846153847,0.5079365079365079,0.6701570680628273,0.5153846153846153
9,9,31,31,29,30,0.9354838709677419,0.4915254237288136,0.6444444444444444,0.4838709677419355
10,10,57,57,53,44,0.9298245614035088,0.5463917525773195,0.6883116883116882,0.5789473684210527
11,11,48,48,47,46,0.9791666666666666,0.5053763440860215,0.6666666666666667,0.5104166666666666
12,12,36,36,31,34,0.8611111111111112,0.47692307692307695,0.6138613861386139,0.4583333333333333
13,13,17,17,16,16,0.9411764705882353,0.5,0.6530612244897959,0.5
14,14,77,77,75,68,0.974025974025974,0.5244755244755245,0.6818181818181818,0.5454545454545454
15,15,40,40,40,39,1.0,0.5063291139240507,0.6722689075630253,0.5125
16,16,29,29,29,28,1.0,0.5087719298245614,0.6744186046511628,0.5172413793103449
17,total,1043,1043,999,948,,,,
18,,,,,Micro avg.,0.9578139980824545,0.5130970724191063,0.668227424749164,0.5244487056567594
19,,,,,Macro avg.,0.9502301034657258,0.5238390802520128,0.6742342895779705,0.5385074401082159
threshold-0.25,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,2,0,1.0,1.0,1.0,1.0
1,1,30,30,22,13,0.7333333333333333,0.6285714285714286,0.6769230769230768,0.65
2,2,288,288,269,264,0.9340277777777778,0.5046904315196998,0.6552984165651644,0.5086805555555556
3,3,87,87,79,59,0.9080459770114943,0.572463768115942,0.7022222222222222,0.6149425287356322
4,4,94,94,86,80,0.9148936170212766,0.5180722891566265,0.6615384615384615,0.5319148936170213
5,5,62,62,37,42,0.5967741935483871,0.46835443037974683,0.5248226950354611,0.4596774193548387
6,6,63,63,55,28,0.873015873015873,0.6626506024096386,0.7534246575342466,0.7142857142857143
7,7,17,17,12,9,0.7058823529411765,0.5714285714285714,0.6315789473684211,0.5882352941176471
8,8,65,65,56,57,0.8615384615384616,0.49557522123893805,0.6292134831460675,0.49230769230769234
9,9,31,31,23,16,0.7419354838709677,0.5897435897435898,0.6571428571428573,0.6129032258064516
10,10,57,57,44,27,0.7719298245614035,0.6197183098591549,0.6875,0.6491228070175439
11,11,48,48,44,39,0.9166666666666666,0.5301204819277109,0.6717557251908397,0.5520833333333334
12,12,36,36,28,19,0.7777777777777778,0.5957446808510638,0.674698795180723,0.625
13,13,17,17,15,12,0.8823529411764706,0.5555555555555556,0.6818181818181819,0.5882352941176471
14,14,77,77,68,52,0.8831168831168831,0.5666666666666667,0.6903553299492385,0.6038961038961039
15,15,40,40,25,28,0.625,0.4716981132075472,0.5376344086021505,0.4625
16,16,29,29,29,27,1.0,0.5178571428571429,0.6823529411764707,0.5344827586206896
17,total,1043,1043,894,772,,,,
18,,,,,Micro avg.,0.8571428571428571,0.5366146458583433,0.6600221483942413,0.5584851390220518
19,,,,,Macro avg.,0.8309583037269384,0.580524193146413,0.6775458940819753,0.5993098600450513
threshold-0.5,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,0,0.5,1.0,0.6666666666666666,0.75
1,1,30,30,18,10,0.6,0.6428571428571429,0.6206896551724138,0.6333333333333333
2,2,288,288,250,232,0.8680555555555556,0.5186721991701245,0.6493506493506493,0.53125
3,3,87,87,69,39,0.7931034482758621,0.6388888888888888,0.7076923076923076,0.6724137931034483
4,4,94,94,73,64,0.776595744680851,0.5328467153284672,0.6320346320346321,0.5478723404255319
5,5,62,62,23,29,0.3709677419354839,0.4423076923076923,0.4035087719298246,0.45161290322580644
6,6,63,63,52,13,0.8253968253968254,0.8,0.8125,0.8095238095238095
7,7,17,17,12,5,0.7058823529411765,0.7058823529411765,0.7058823529411765,0.7058823529411765
8,8,65,65,48,49,0.7384615384615385,0.4948453608247423,0.5925925925925927,0.49230769230769234
9,9,31,31,15,6,0.4838709677419355,0.7142857142857143,0.5769230769230769,0.6451612903225806
10,10,57,57,34,19,0.5964912280701754,0.6415094339622641,0.6181818181818182,0.631578947368421
11,11,48,48,31,22,0.6458333333333334,0.5849056603773585,0.6138613861386139,0.59375
12,12,36,36,21,14,0.5833333333333334,0.6,0.591549295774648,0.5972222222222222
13,13,17,17,15,7,0.8823529411764706,0.6818181818181818,0.7692307692307693,0.7352941176470589
14,14,77,77,67,37,0.8701298701298701,0.6442307692307693,0.7403314917127072,0.6948051948051948
15,15,40,40,19,21,0.475,0.475,0.47500000000000003,0.475
16,16,29,29,25,24,0.8620689655172413,0.5102040816326531,0.641025641025641,0.5172413793103449
17,total,1043,1043,773,591,,,,
18,,,,,Micro avg.,0.7411313518696069,0.5667155425219942,0.6422933111757374,0.587248322147651
19,,,,,Macro avg.,0.6810319909735092,0.625191423154422,0.6362953592569139,0.6167205515609778
threshold-0.75,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,0,0.5,1.0,0.6666666666666666,0.75
1,1,30,30,10,6,0.3333333333333333,0.625,0.43478260869565216,0.5666666666666667
2,2,288,288,153,102,0.53125,0.6,0.5635359116022098,0.5885416666666666
3,3,87,87,34,13,0.39080459770114945,0.723404255319149,0.5074626865671642,0.6206896551724138
4,4,94,94,51,38,0.5425531914893617,0.5730337078651685,0.5573770491803278,0.5691489361702128
5,5,62,62,16,20,0.25806451612903225,0.4444444444444444,0.32653061224489793,0.46774193548387094
6,6,63,63,42,4,0.6666666666666666,0.9130434782608695,0.7706422018348623,0.8015873015873016
7,7,17,17,7,3,0.4117647058823529,0.7,0.5185185185185185,0.6176470588235294
8,8,65,65,32,32,0.49230769230769234,0.5,0.49612403100775193,0.5
9,9,31,31,10,2,0.3225806451612903,0.8333333333333334,0.4651162790697674,0.6290322580645161
10,10,57,57,32,14,0.5614035087719298,0.6956521739130435,0.6213592233009708,0.6578947368421053
11,11,48,48,25,13,0.5208333333333334,0.6578947368421053,0.5813953488372092,0.625
12,12,36,36,11,8,0.3055555555555556,0.5789473684210527,0.4000000000000001,0.5416666666666666
13,13,17,17,13,3,0.7647058823529411,0.8125,0.787878787878788,0.7941176470588235
14,14,77,77,38,16,0.4935064935064935,0.7037037037037037,0.5801526717557253,0.6428571428571429
15,15,40,40,7,8,0.175,0.4666666666666667,0.2545454545454546,0.4875
16,16,29,29,20,19,0.6896551724137931,0.5128205128205128,0.5882352941176471,0.5172413793103449
17,total,1043,1043,502,301,,,,
18,,,,,Micro avg.,0.4813039309683605,0.6251556662515566,0.5438786565547128,0.5963566634707574
19,,,,,Macro avg.,0.46823442909440727,0.6670849636229441,0.5364896085778595,0.6104313559629566
topk-9,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,2,0,1.0,1.0,1.0,1.0
1,1,30,30,25,13,0.8333333333333334,0.6578947368421053,0.7352941176470588,0.7
2,2,288,288,278,265,0.9652777777777778,0.5119705340699816,0.6690734055354994,0.5225694444444444
3,3,87,87,80,66,0.9195402298850575,0.547945205479452,0.686695278969957,0.5804597701149425
4,4,94,94,86,87,0.9148936170212766,0.49710982658959535,0.6441947565543071,0.4946808510638298
5,5,62,62,32,35,0.5161290322580645,0.47761194029850745,0.49612403100775193,0.47580645161290325
6,6,63,63,57,30,0.9047619047619048,0.6551724137931034,0.7599999999999999,0.7142857142857143
7,7,17,17,15,15,0.8823529411764706,0.5,0.6382978723404256,0.5
8,8,65,65,55,57,0.8461538461538461,0.49107142857142855,0.6214689265536724,0.4846153846153846
9,9,31,31,24,21,0.7741935483870968,0.5333333333333333,0.631578947368421,0.5483870967741935
10,10,57,57,42,28,0.7368421052631579,0.6,0.6614173228346457,0.6228070175438597
11,11,48,48,42,39,0.875,0.5185185185185185,0.6511627906976744,0.53125
12,12,36,36,29,19,0.8055555555555556,0.6041666666666666,0.6904761904761905,0.6388888888888888
13,13,17,17,16,10,0.9411764705882353,0.6153846153846154,0.744186046511628,0.6764705882352942
14,14,77,77,72,52,0.935064935064935,0.5806451612903226,0.7164179104477613,0.6298701298701299
15,15,40,40,39,37,0.975,0.5131578947368421,0.6724137931034483,0.525
16,16,29,29,29,27,1.0,0.5178571428571429,0.6823529411764707,0.5344827586206896
17,total,1043,1043,923,801,,,,
18,,,,,Micro avg.,0.8849472674976031,0.5353828306264501,0.6671485363209252,0.5584851390220518
19,,,,,Macro avg.,0.8720750174839242,0.5777552599077421,0.6883031959544065,0.5987984762394278
topk-7,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,2,0,1.0,1.0,1.0,1.0
1,1,30,30,22,12,0.7333333333333333,0.6470588235294118,0.6875,0.6666666666666666
2,2,288,288,272,250,0.9444444444444444,0.5210727969348659,0.671604938271605,0.5381944444444444
3,3,87,87,79,59,0.9080459770114943,0.572463768115942,0.7022222222222222,0.6149425287356322
4,4,94,94,83,81,0.8829787234042553,0.5060975609756098,0.6434108527131783,0.5106382978723404
5,5,62,62,23,29,0.3709677419354839,0.4423076923076923,0.4035087719298246,0.45161290322580644
6,6,63,63,54,19,0.8571428571428571,0.7397260273972602,0.7941176470588235,0.7777777777777778
7,7,17,17,13,11,0.7647058823529411,0.5416666666666666,0.6341463414634146,0.5588235294117647
8,8,65,65,51,57,0.7846153846153846,0.4722222222222222,0.5895953757225433,0.45384615384615384
9,9,31,31,20,13,0.6451612903225806,0.6060606060606061,0.625,0.6129032258064516
10,10,57,57,41,24,0.7192982456140351,0.6307692307692307,0.6721311475409837,0.6491228070175439
11,11,48,48,40,27,0.8333333333333334,0.5970149253731343,0.6956521739130433,0.6354166666666666
12,12,36,36,22,14,0.6111111111111112,0.6111111111111112,0.6111111111111112,0.6111111111111112
13,13,17,17,15,8,0.8823529411764706,0.6521739130434783,0.75,0.7058823529411765
14,14,77,77,66,43,0.8571428571428571,0.6055045871559633,0.7096774193548387,0.6493506493506493
15,15,40,40,37,35,0.925,0.5138888888888888,0.6607142857142857,0.525
16,16,29,29,28,27,0.9655172413793104,0.509090909090909,0.6666666666666667,0.5172413793103449
17,total,1043,1043,868,709,,,,
18,,,,,Micro avg.,0.8322147651006712,0.5504121750158529,0.6625954198473283,0.576222435282838
19,,,,,Macro avg.,0.8050089037835232,0.5981311605672349,0.6774740560989728,0.6163841467167371
topk-5,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,0,0.5,1.0,0.6666666666666666,0.75
1,1,30,30,18,9,0.6,0.6666666666666666,0.631578947368421,0.65
2,2,288,288,256,233,0.8888888888888888,0.523517382413088,0.658944658944659,0.5399305555555556
3,3,87,87,73,50,0.8390804597701149,0.5934959349593496,0.6952380952380952,0.632183908045977
4,4,94,94,80,75,0.851063829787234,0.5161290322580645,0.6425702811244981,0.526595744680851
5,5,62,62,15,23,0.24193548387096775,0.39473684210526316,0.3,0.43548387096774194
6,6,63,63,52,9,0.8253968253968254,0.8524590163934426,0.8387096774193549,0.8412698412698413
7,7,17,17,11,7,0.6470588235294118,0.6111111111111112,0.6285714285714287,0.6176470588235294
8,8,65,65,43,48,0.6615384615384615,0.4725274725274725,0.5512820512820512,0.46153846153846156
9,9,31,31,18,8,0.5806451612903226,0.6923076923076923,0.631578947368421,0.6612903225806451
10,10,57,57,36,17,0.631578947368421,0.6792452830188679,0.6545454545454544,0.6666666666666666
11,11,48,48,30,17,0.625,0.6382978723404256,0.631578947368421,0.6354166666666666
12,12,36,36,18,13,0.5,0.5806451612903226,0.537313432835821,0.5694444444444444
13,13,17,17,15,6,0.8823529411764706,0.7142857142857143,0.7894736842105262,0.7647058823529411
14,14,77,77,59,34,0.7662337662337663,0.6344086021505376,0.6941176470588235,0.6623376623376623
15,15,40,40,29,31,0.725,0.48333333333333334,0.58,0.475
16,16,29,29,27,27,0.9310344827586207,0.5,0.6506024096385543,0.5
17,total,1043,1043,781,607,,,,
18,,,,,Micro avg.,0.7488015340364333,0.5626801152737753,0.6425339366515836,0.5834132310642378
19,,,,,Macro avg.,0.6880475336240887,0.6207745363036088,0.6342807252730114,0.6111477109371168
topk-3,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,0,0.5,1.0,0.6666666666666666,0.75
1,1,30,30,11,7,0.36666666666666664,0.6111111111111112,0.45833333333333326,0.5666666666666667
2,2,288,288,233,195,0.8090277777777778,0.544392523364486,0.6508379888268156,0.5659722222222222
3,3,87,87,53,27,0.6091954022988506,0.6625,0.6347305389221558,0.6494252873563219
4,4,94,94,70,64,0.7446808510638298,0.5223880597014925,0.6140350877192982,0.5319148936170213
5,5,62,62,15,20,0.24193548387096775,0.42857142857142855,0.3092783505154639,0.4596774193548387
6,6,63,63,45,3,0.7142857142857143,0.9375,0.8108108108108109,0.8333333333333334
7,7,17,17,11,3,0.6470588235294118,0.7857142857142857,0.7096774193548386,0.7352941176470589
8,8,65,65,31,36,0.47692307692307695,0.4626865671641791,0.4696969696969697,0.46153846153846156
9,9,31,31,16,5,0.5161290322580645,0.7619047619047619,0.6153846153846153,0.6774193548387096
10,10,57,57,32,14,0.5614035087719298,0.6956521739130435,0.6213592233009708,0.6578947368421053
11,11,48,48,16,11,0.3333333333333333,0.5925925925925926,0.4266666666666667,0.5520833333333334
12,12,36,36,12,11,0.3333333333333333,0.5217391304347826,0.4067796610169491,0.5138888888888888
13,13,17,17,15,4,0.8823529411764706,0.7894736842105263,0.8333333333333333,0.8235294117647058
14,14,77,77,46,21,0.5974025974025974,0.6865671641791045,0.638888888888889,0.6623376623376623
15,15,40,40,23,23,0.575,0.5,0.5348837209302325,0.5
16,16,29,29,23,21,0.7931034482758621,0.5227272727272727,0.6301369863013699,0.5344827586206896
17,total,1043,1043,653,465,,,,
18,,,,,Micro avg.,0.62607861936721,0.5840787119856887,0.6043498380379455,0.590124640460211
19,,,,,Macro avg.,0.5706959994686992,0.6485600444464158,0.590088250686434,0.6162034440212953