Zero_Shot_Classifier_Tester_for_TP_FP / Reports /report-Model-0_Queries-0_Prompt-0_Strategies-argmax-threshold0.05-threshold0.25-threshold0.5-threshold0.75-topk9-topk7-topk5-topk3.csv
gorkaartola's picture
Upload report-Model-0_Queries-0_Prompt-0_Strategies-argmax-threshold0.05-threshold0.25-threshold0.5-threshold0.75-topk9-topk7-topk5-topk3.csv
ed5ad27
argmax_max,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,0,0,0.0,0.0,0.0,0.5
2,2,288,288,15,9,0.052083333333333336,0.625,0.09615384615384616,0.5104166666666666
3,3,87,87,5,4,0.05747126436781609,0.5555555555555556,0.10416666666666666,0.5057471264367817
4,4,94,94,23,8,0.24468085106382978,0.7419354838709677,0.368,0.5797872340425532
5,5,62,62,1,0,0.016129032258064516,1.0,0.031746031746031744,0.5080645161290323
6,6,63,63,1,0,0.015873015873015872,1.0,0.03125,0.5079365079365079
7,7,17,17,1,0,0.058823529411764705,1.0,0.1111111111111111,0.5294117647058824
8,8,65,65,2,2,0.03076923076923077,0.5,0.057971014492753624,0.5
9,9,31,31,3,2,0.0967741935483871,0.6,0.16666666666666666,0.5161290322580645
10,10,57,57,8,4,0.14035087719298245,0.6666666666666666,0.2318840579710145,0.5350877192982456
11,11,48,48,31,21,0.6458333333333334,0.5961538461538461,0.62,0.6041666666666666
12,12,36,36,14,12,0.3888888888888889,0.5384615384615384,0.45161290322580644,0.5277777777777778
13,13,17,17,9,7,0.5294117647058824,0.5625,0.5454545454545455,0.5588235294117647
14,14,77,77,15,10,0.19480519480519481,0.6,0.2941176470588235,0.5324675324675324
15,15,40,40,0,0,0.0,0.0,0.0,0.5
16,16,29,29,11,15,0.3793103448275862,0.4230769230769231,0.4,0.43103448275862066
17,total,1043,1043,139,94,,,,
18,,,,,Micro avg.,0.1332694151486098,0.5965665236051502,0.21786833855799376,0.5215723873441994
19,,,,,Macro avg.,0.16771793261054765,0.5534911772814999,0.2064784994439568,0.5204029739150645
threshold-0.05,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,2,2,0.06666666666666667,0.5,0.11764705882352941,0.5
2,2,288,288,107,106,0.3715277777777778,0.5023474178403756,0.42714570858283435,0.5017361111111112
3,3,87,87,81,66,0.9310344827586207,0.5510204081632653,0.6923076923076924,0.5862068965517241
4,4,94,94,89,92,0.9468085106382979,0.49171270718232046,0.6472727272727273,0.48404255319148937
5,5,62,62,41,34,0.6612903225806451,0.5466666666666666,0.5985401459854014,0.5564516129032258
6,6,63,63,53,35,0.8412698412698413,0.6022727272727273,0.7019867549668876,0.6428571428571429
7,7,17,17,13,9,0.7647058823529411,0.5909090909090909,0.6666666666666667,0.6176470588235294
8,8,65,65,56,45,0.8615384615384616,0.5544554455445545,0.674698795180723,0.5846153846153846
9,9,31,31,28,31,0.9032258064516129,0.4745762711864407,0.6222222222222222,0.45161290322580644
10,10,57,57,56,49,0.9824561403508771,0.5333333333333333,0.691358024691358,0.5614035087719298
11,11,48,48,46,48,0.9583333333333334,0.48936170212765956,0.6478873239436619,0.4791666666666667
12,12,36,36,34,34,0.9444444444444444,0.5,0.6538461538461539,0.5
13,13,17,17,17,17,1.0,0.5,0.6666666666666666,0.5
14,14,77,77,64,58,0.8311688311688312,0.5245901639344263,0.6432160804020102,0.538961038961039
15,15,40,40,5,12,0.125,0.29411764705882354,0.17543859649122806,0.4125
16,16,29,29,29,28,1.0,0.5087719298245614,0.6744186046511628,0.5172413793103449
17,total,1043,1043,721,666,,,,
18,,,,,Micro avg.,0.6912751677852349,0.5198269646719539,0.5934156378600824,0.5263662511984659
19,,,,,Macro avg.,0.7170276765489619,0.48024326535554374,0.5471364248647602,0.5255554268817291
threshold-0.25,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,1,1,0.03333333333333333,0.5,0.0625,0.5
2,2,288,288,63,48,0.21875,0.5675675675675675,0.3157894736842105,0.5260416666666666
3,3,87,87,66,48,0.7586206896551724,0.5789473684210527,0.6567164179104478,0.603448275862069
4,4,94,94,73,69,0.776595744680851,0.5140845070422535,0.6186440677966101,0.5212765957446809
5,5,62,62,15,10,0.24193548387096775,0.6,0.3448275862068966,0.5403225806451613
6,6,63,63,43,14,0.6825396825396826,0.7543859649122807,0.7166666666666668,0.7301587301587301
7,7,17,17,12,7,0.7058823529411765,0.631578947368421,0.6666666666666667,0.6470588235294118
8,8,65,65,48,36,0.7384615384615385,0.5714285714285714,0.6442953020134228,0.5923076923076923
9,9,31,31,25,31,0.8064516129032258,0.44642857142857145,0.5747126436781609,0.4032258064516129
10,10,57,57,47,31,0.8245614035087719,0.6025641025641025,0.6962962962962963,0.6403508771929824
11,11,48,48,43,43,0.8958333333333334,0.5,0.6417910447761194,0.5
12,12,36,36,31,32,0.8611111111111112,0.49206349206349204,0.6262626262626263,0.4861111111111111
13,13,17,17,16,14,0.9411764705882353,0.5333333333333333,0.6808510638297872,0.5588235294117647
14,14,77,77,52,50,0.6753246753246753,0.5098039215686274,0.5810055865921787,0.512987012987013
15,15,40,40,1,4,0.025,0.2,0.04444444444444445,0.4625
16,16,29,29,29,28,1.0,0.5087719298245614,0.6744186046511628,0.5172413793103449
17,total,1043,1043,565,466,,,,
18,,,,,Micro avg.,0.5417066155321189,0.5480116391852571,0.5448408871745419,0.5474592521572388
19,,,,,Macro avg.,0.5991516136618869,0.5006446045601667,0.5026993230279821,0.5436384753752495
threshold-0.5,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,0,0,0.0,0.0,0.0,0.5
2,2,288,288,36,20,0.125,0.6428571428571429,0.20930232558139536,0.5277777777777778
3,3,87,87,34,24,0.39080459770114945,0.5862068965517241,0.46896551724137925,0.5574712643678161
4,4,94,94,49,29,0.5212765957446809,0.6282051282051282,0.5697674418604651,0.6063829787234043
5,5,62,62,3,1,0.04838709677419355,0.75,0.0909090909090909,0.5161290322580645
6,6,63,63,25,5,0.3968253968253968,0.8333333333333334,0.5376344086021505,0.6587301587301587
7,7,17,17,5,1,0.29411764705882354,0.8333333333333334,0.4347826086956522,0.6176470588235294
8,8,65,65,33,16,0.5076923076923077,0.673469387755102,0.5789473684210525,0.6307692307692307
9,9,31,31,19,23,0.6129032258064516,0.4523809523809524,0.5205479452054794,0.43548387096774194
10,10,57,57,39,19,0.6842105263157895,0.6724137931034483,0.6782608695652174,0.6754385964912281
11,11,48,48,41,37,0.8541666666666666,0.5256410256410257,0.6507936507936508,0.5416666666666666
12,12,36,36,27,24,0.75,0.5294117647058824,0.6206896551724139,0.5416666666666666
13,13,17,17,14,12,0.8235294117647058,0.5384615384615384,0.6511627906976744,0.5588235294117647
14,14,77,77,40,37,0.5194805194805194,0.5194805194805194,0.5194805194805194,0.5194805194805194
15,15,40,40,1,1,0.025,0.5,0.047619047619047616,0.5
16,16,29,29,26,27,0.896551724137931,0.49056603773584906,0.6341463414634146,0.4827586206896552
17,total,1043,1043,392,276,,,,
18,,,,,Micro avg.,0.37583892617449666,0.5868263473053892,0.4582115721800117,0.5556088207094918
19,,,,,Macro avg.,0.43823210093933035,0.5397506384438223,0.42429468125344727,0.5511897630484837
threshold-0.75,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,0,0,0.0,0.0,0.0,0.5
2,2,288,288,17,12,0.059027777777777776,0.5862068965517241,0.10725552050473185,0.5086805555555556
3,3,87,87,11,7,0.12643678160919541,0.6111111111111112,0.20952380952380958,0.5229885057471264
4,4,94,94,23,8,0.24468085106382978,0.7419354838709677,0.368,0.5797872340425532
5,5,62,62,1,0,0.016129032258064516,1.0,0.031746031746031744,0.5080645161290323
6,6,63,63,10,1,0.15873015873015872,0.9090909090909091,0.2702702702702703,0.5714285714285714
7,7,17,17,3,0,0.17647058823529413,1.0,0.3,0.5882352941176471
8,8,65,65,15,6,0.23076923076923078,0.7142857142857143,0.34883720930232565,0.5692307692307692
9,9,31,31,6,1,0.1935483870967742,0.8571428571428571,0.3157894736842105,0.5806451612903226
10,10,57,57,19,9,0.3333333333333333,0.6785714285714286,0.4470588235294118,0.5877192982456141
11,11,48,48,33,23,0.6875,0.5892857142857143,0.6346153846153846,0.6041666666666666
12,12,36,36,23,21,0.6388888888888888,0.5227272727272727,0.575,0.5277777777777778
13,13,17,17,13,10,0.7647058823529411,0.5652173913043478,0.65,0.5882352941176471
14,14,77,77,23,16,0.2987012987012987,0.5897435897435898,0.39655172413793105,0.5454545454545454
15,15,40,40,1,1,0.025,0.5,0.047619047619047616,0.5
16,16,29,29,19,24,0.6551724137931034,0.4418604651162791,0.5277777777777777,0.41379310344827586
17,total,1043,1043,217,139,,,,
18,,,,,Micro avg.,0.2080536912751678,0.6095505617977528,0.31022158684774837,0.537392138063279
19,,,,,Macro avg.,0.2711232132123465,0.6063046372824656,0.30764971015946657,0.5409533701913002
topk-9,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,1,2,0.03333333333333333,0.3333333333333333,0.0606060606060606,0.48333333333333334
2,2,288,288,80,67,0.2777777777777778,0.54421768707483,0.367816091954023,0.5225694444444444
3,3,87,87,75,57,0.8620689655172413,0.5681818181818182,0.6849315068493151,0.603448275862069
4,4,94,94,87,90,0.925531914893617,0.4915254237288136,0.6420664206642066,0.48404255319148937
5,5,62,62,14,11,0.22580645161290322,0.56,0.3218390804597701,0.5241935483870968
6,6,63,63,45,18,0.7142857142857143,0.7142857142857143,0.7142857142857143,0.7142857142857143
7,7,17,17,12,9,0.7058823529411765,0.5714285714285714,0.6315789473684211,0.5882352941176471
8,8,65,65,52,41,0.8,0.5591397849462365,0.6582278481012658,0.5846153846153846
9,9,31,31,28,31,0.9032258064516129,0.4745762711864407,0.6222222222222222,0.45161290322580644
10,10,57,57,52,36,0.9122807017543859,0.5909090909090909,0.7172413793103448,0.6403508771929824
11,11,48,48,45,48,0.9375,0.4838709677419355,0.6382978723404255,0.46875
12,12,36,36,34,33,0.9444444444444444,0.5074626865671642,0.6601941747572816,0.5138888888888888
13,13,17,17,17,14,1.0,0.5483870967741935,0.7083333333333333,0.5882352941176471
14,14,77,77,62,53,0.8051948051948052,0.5391304347826087,0.6458333333333333,0.5584415584415584
15,15,40,40,3,3,0.075,0.5,0.13043478260869565,0.5
16,16,29,29,29,28,1.0,0.5087719298245614,0.6744186046511628,0.5172413793103449
17,total,1043,1043,636,541,,,,
18,,,,,Micro avg.,0.6097794822627037,0.54035683942226,0.572972972972973,0.5455417066155321
19,,,,,Macro avg.,0.6542548393062947,0.49971887122148895,0.5222545513438572,0.5437202617302592
topk-7,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,0,0,0.0,0.0,0.0,0.5
2,2,288,288,61,39,0.21180555555555555,0.61,0.31443298969072164,0.5381944444444444
3,3,87,87,61,47,0.7011494252873564,0.5648148148148148,0.6256410256410256,0.5804597701149425
4,4,94,94,85,84,0.9042553191489362,0.5029585798816568,0.6463878326996197,0.5053191489361702
5,5,62,62,11,4,0.1774193548387097,0.7333333333333333,0.2857142857142857,0.5564516129032258
6,6,63,63,37,8,0.5873015873015873,0.8222222222222222,0.6851851851851852,0.7301587301587301
7,7,17,17,11,5,0.6470588235294118,0.6875,0.6666666666666667,0.6764705882352942
8,8,65,65,48,31,0.7384615384615385,0.6075949367088608,0.6666666666666666,0.6307692307692307
9,9,31,31,28,29,0.9032258064516129,0.49122807017543857,0.6363636363636364,0.4838709677419355
10,10,57,57,45,33,0.7894736842105263,0.5769230769230769,0.6666666666666666,0.6052631578947368
11,11,48,48,43,44,0.8958333333333334,0.4942528735632184,0.6370370370370371,0.4895833333333333
12,12,36,36,34,32,0.9444444444444444,0.5151515151515151,0.6666666666666666,0.5277777777777778
13,13,17,17,17,13,1.0,0.5666666666666667,0.7234042553191489,0.6176470588235294
14,14,77,77,56,46,0.7272727272727273,0.5490196078431373,0.6256983240223464,0.564935064935065
15,15,40,40,1,3,0.025,0.25,0.045454545454545456,0.475
16,16,29,29,29,28,1.0,0.5087719298245614,0.6744186046511628,0.5172413793103449
17,total,1043,1043,567,446,,,,
18,,,,,Micro avg.,0.5436241610738255,0.5597235932872655,0.551556420233463,0.5580057526366251
19,,,,,Macro avg.,0.6031000941079847,0.4988492721828531,0.5039061404967873,0.5587730744340448
topk-5,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,0,0,0.0,0.0,0.0,0.5
2,2,288,288,49,24,0.1701388888888889,0.6712328767123288,0.27146814404432135,0.5434027777777778
3,3,87,87,52,39,0.5977011494252874,0.5714285714285714,0.5842696629213484,0.5747126436781609
4,4,94,94,69,64,0.7340425531914894,0.518796992481203,0.6079295154185022,0.526595744680851
5,5,62,62,4,2,0.06451612903225806,0.6666666666666666,0.1176470588235294,0.5161290322580645
6,6,63,63,23,2,0.36507936507936506,0.92,0.5227272727272727,0.6666666666666666
7,7,17,17,8,2,0.47058823529411764,0.8,0.5925925925925927,0.6764705882352942
8,8,65,65,32,21,0.49230769230769234,0.6037735849056604,0.5423728813559323,0.5846153846153846
9,9,31,31,26,27,0.8387096774193549,0.49056603773584906,0.6190476190476191,0.4838709677419355
10,10,57,57,37,24,0.6491228070175439,0.6065573770491803,0.6271186440677966,0.6140350877192983
11,11,48,48,41,40,0.8541666666666666,0.5061728395061729,0.6356589147286822,0.5104166666666666
12,12,36,36,31,28,0.8611111111111112,0.5254237288135594,0.6526315789473686,0.5416666666666666
13,13,17,17,16,12,0.9411764705882353,0.5714285714285714,0.7111111111111111,0.6176470588235294
14,14,77,77,46,37,0.5974025974025974,0.5542168674698795,0.575,0.5584415584415584
15,15,40,40,1,1,0.025,0.5,0.047619047619047616,0.5
16,16,29,29,25,28,0.8620689655172413,0.4716981132075472,0.6097560975609757,0.4482758620689655
17,total,1043,1043,460,351,,,,
18,,,,,Micro avg.,0.44103547459252157,0.5672009864364982,0.4962243797195253,0.5522531160115053
19,,,,,Macro avg.,0.501360724055403,0.5281154251414817,0.4539382435862412,0.5507615709435776
topk-3,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,0,0,0.0,0.0,0.0,0.5
2,2,288,288,29,18,0.10069444444444445,0.6170212765957447,0.17313432835820897,0.5190972222222222
3,3,87,87,28,23,0.3218390804597701,0.5490196078431373,0.40579710144927533,0.5287356321839081
4,4,94,94,56,34,0.5957446808510638,0.6222222222222222,0.608695652173913,0.6170212765957447
5,5,62,62,2,0,0.03225806451612903,1.0,0.0625,0.5161290322580645
6,6,63,63,8,1,0.12698412698412698,0.8888888888888888,0.2222222222222222,0.5555555555555556
7,7,17,17,5,0,0.29411764705882354,1.0,0.45454545454545453,0.6470588235294118
8,8,65,65,18,10,0.27692307692307694,0.6428571428571429,0.38709677419354843,0.5615384615384615
9,9,31,31,18,18,0.5806451612903226,0.5,0.537313432835821,0.5
10,10,57,57,25,17,0.43859649122807015,0.5952380952380952,0.505050505050505,0.5701754385964912
11,11,48,48,37,29,0.7708333333333334,0.5606060606060606,0.6491228070175438,0.5833333333333334
12,12,36,36,25,23,0.6944444444444444,0.5208333333333334,0.5952380952380953,0.5277777777777778
13,13,17,17,14,11,0.8235294117647058,0.56,0.6666666666666666,0.5882352941176471
14,14,77,77,31,20,0.4025974025974026,0.6078431372549019,0.48437499999999994,0.5714285714285714
15,15,40,40,1,0,0.025,1.0,0.04878048780487806,0.5125
16,16,29,29,23,26,0.7931034482758621,0.46938775510204084,0.5897435897435898,0.4482758620689655
17,total,1043,1043,320,230,,,,
18,,,,,Micro avg.,0.3068072866730585,0.5818181818181818,0.40175768989328314,0.5431447746883988
19,,,,,Macro avg.,0.3692535773042104,0.5961127952906804,0.37589894807645424,0.543933075365068