Zero_Shot_Classifier_Tester_for_TP_FP / Reports /report-Model-0_Queries-1_Prompt-3_Strategies-argmax-threshold0.05-threshold0.25-threshold0.5-threshold0.75-topk9-topk7-topk5-topk3.csv
gorkaartola's picture
Upload report-Model-0_Queries-1_Prompt-3_Strategies-argmax-threshold0.05-threshold0.25-threshold0.5-threshold0.75-topk9-topk7-topk5-topk3.csv
028633f
argmax_max,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,0,0,0.0,0.0,0.0,0.5
2,2,288,288,4,0,0.013888888888888888,1.0,0.0273972602739726,0.5069444444444444
3,3,87,87,6,1,0.06896551724137931,0.8571428571428571,0.1276595744680851,0.5287356321839081
4,4,94,94,1,1,0.010638297872340425,0.5,0.020833333333333336,0.5
5,5,62,62,0,0,0.0,0.0,0.0,0.5
6,6,63,63,0,0,0.0,0.0,0.0,0.5
7,7,17,17,2,1,0.11764705882352941,0.6666666666666666,0.2,0.5294117647058824
8,8,65,65,5,2,0.07692307692307693,0.7142857142857143,0.1388888888888889,0.5230769230769231
9,9,31,31,2,1,0.06451612903225806,0.6666666666666666,0.1176470588235294,0.5161290322580645
10,10,57,57,5,1,0.08771929824561403,0.8333333333333334,0.15873015873015872,0.5350877192982456
11,11,48,48,16,7,0.3333333333333333,0.6956521739130435,0.4507042253521127,0.59375
12,12,36,36,4,3,0.1111111111111111,0.5714285714285714,0.18604651162790697,0.5138888888888888
13,13,17,17,8,3,0.47058823529411764,0.7272727272727273,0.5714285714285714,0.6470588235294118
14,14,77,77,0,0,0.0,0.0,0.0,0.5
15,15,40,40,28,30,0.7,0.4827586206896552,0.5714285714285714,0.475
16,16,29,29,0,0,0.0,0.0,0.0,0.5
17,total,1043,1043,81,50,,,,
18,,,,,Micro avg.,0.07766059443911794,0.6183206106870229,0.13798977853492334,0.5148609779482263
19,,,,,Macro avg.,0.12090182039797938,0.45383572537642564,0.15122142084441945,0.5217107781403394
threshold-0.05,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,2,2,1.0,0.5,0.6666666666666666,0.5
1,1,30,30,30,30,1.0,0.5,0.6666666666666666,0.5
2,2,288,288,276,274,0.9583333333333334,0.5018181818181818,0.6587112171837709,0.5034722222222222
3,3,87,87,86,80,0.9885057471264368,0.5180722891566265,0.6798418972332015,0.5344827586206896
4,4,94,94,92,91,0.9787234042553191,0.5027322404371585,0.6642599277978339,0.5053191489361702
5,5,62,62,53,57,0.8548387096774194,0.4818181818181818,0.6162790697674418,0.46774193548387094
6,6,63,63,63,59,1.0,0.5163934426229508,0.6810810810810811,0.5317460317460317
7,7,17,17,17,17,1.0,0.5,0.6666666666666666,0.5
8,8,65,65,65,65,1.0,0.5,0.6666666666666666,0.5
9,9,31,31,31,31,1.0,0.5,0.6666666666666666,0.5
10,10,57,57,56,56,0.9824561403508771,0.5,0.6627218934911242,0.5
11,11,48,48,48,46,1.0,0.5106382978723404,0.676056338028169,0.5208333333333334
12,12,36,36,35,36,0.9722222222222222,0.49295774647887325,0.6542056074766356,0.4861111111111111
13,13,17,17,17,15,1.0,0.53125,0.6938775510204082,0.5588235294117647
14,14,77,77,77,76,1.0,0.5032679738562091,0.6695652173913044,0.5064935064935064
15,15,40,40,40,40,1.0,0.5,0.6666666666666666,0.5
16,16,29,29,29,28,1.0,0.5087719298245614,0.6744186046511628,0.5172413793103449
17,total,1043,1043,1017,1003,,,,
18,,,,,Micro avg.,0.975071907957814,0.5034653465346535,0.6640548481880509,0.5067114093959731
19,,,,,Macro avg.,0.9844164445273887,0.5039835461108872,0.6665304944189491,0.5077802915687673
threshold-0.25,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,2,0,1.0,1.0,1.0,1.0
1,1,30,30,28,23,0.9333333333333333,0.5490196078431373,0.6913580246913581,0.5833333333333334
2,2,288,288,191,195,0.6631944444444444,0.4948186528497409,0.5667655786350149,0.4930555555555556
3,3,87,87,83,68,0.9540229885057471,0.5496688741721855,0.6974789915966386,0.5862068965517241
4,4,94,94,75,79,0.7978723404255319,0.487012987012987,0.6048387096774194,0.4787234042553192
5,5,62,62,42,46,0.6774193548387096,0.4772727272727273,0.5599999999999999,0.46774193548387094
6,6,63,63,58,37,0.9206349206349206,0.6105263157894737,0.7341772151898734,0.6666666666666666
7,7,17,17,17,15,1.0,0.53125,0.6938775510204082,0.5588235294117647
8,8,65,65,64,61,0.9846153846153847,0.512,0.6736842105263159,0.5230769230769231
9,9,31,31,31,31,1.0,0.5,0.6666666666666666,0.5
10,10,57,57,55,51,0.9649122807017544,0.5188679245283019,0.674846625766871,0.5350877192982456
11,11,48,48,45,45,0.9375,0.5,0.6521739130434783,0.5
12,12,36,36,35,36,0.9722222222222222,0.49295774647887325,0.6542056074766356,0.4861111111111111
13,13,17,17,16,15,0.9411764705882353,0.5161290322580645,0.6666666666666666,0.5294117647058824
14,14,77,77,65,66,0.8441558441558441,0.4961832061068702,0.625,0.4935064935064935
15,15,40,40,38,40,0.95,0.48717948717948717,0.6440677966101694,0.475
16,16,29,29,25,22,0.8620689655172413,0.5319148936170213,0.6578947368421053,0.5517241379310345
17,total,1043,1043,870,830,,,,
18,,,,,Micro avg.,0.8341323106423778,0.5117647058823529,0.6343419613561793,0.5191754554170661
19,,,,,Macro avg.,0.9060663852931395,0.5444000855946394,0.6743354290829189,0.5546158512287014
threshold-0.5,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,12,6,0.4,0.6666666666666666,0.5,0.6
2,2,288,288,30,25,0.10416666666666667,0.5454545454545454,0.17492711370262393,0.5086805555555556
3,3,87,87,57,22,0.6551724137931034,0.7215189873417721,0.6867469879518071,0.7011494252873564
4,4,94,94,23,9,0.24468085106382978,0.71875,0.3650793650793651,0.574468085106383
5,5,62,62,8,6,0.12903225806451613,0.5714285714285714,0.2105263157894737,0.5161290322580645
6,6,63,63,15,7,0.23809523809523808,0.6818181818181818,0.3529411764705882,0.5634920634920635
7,7,17,17,13,10,0.7647058823529411,0.5652173913043478,0.65,0.5882352941176471
8,8,65,65,52,42,0.8,0.5531914893617021,0.6540880503144655,0.5769230769230769
9,9,31,31,24,21,0.7741935483870968,0.5333333333333333,0.631578947368421,0.5483870967741935
10,10,57,57,45,30,0.7894736842105263,0.6,0.6818181818181819,0.631578947368421
11,11,48,48,39,35,0.8125,0.527027027027027,0.6393442622950819,0.5416666666666666
12,12,36,36,28,29,0.7777777777777778,0.49122807017543857,0.6021505376344085,0.4861111111111111
13,13,17,17,12,11,0.7058823529411765,0.5217391304347826,0.6,0.5294117647058824
14,14,77,77,16,14,0.2077922077922078,0.5333333333333333,0.2990654205607477,0.512987012987013
15,15,40,40,31,34,0.775,0.47692307692307695,0.5904761904761905,0.4625
16,16,29,29,3,2,0.10344827586206896,0.6,0.17647058823529413,0.5172413793103449
17,total,1043,1043,408,303,,,,
18,,,,,Micro avg.,0.3911792905081496,0.5738396624472574,0.46522234891676173,0.5503355704697986
19,,,,,Macro avg.,0.48717183276512654,0.5475076355648693,0.4597184198645087,0.5505271477449282
threshold-0.75,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,1,0,0.03333333333333333,1.0,0.06451612903225806,0.5166666666666667
2,2,288,288,1,1,0.003472222222222222,0.5,0.006896551724137931,0.5
3,3,87,87,6,2,0.06896551724137931,0.75,0.12631578947368421,0.5229885057471264
4,4,94,94,2,0,0.02127659574468085,1.0,0.04166666666666667,0.5106382978723404
5,5,62,62,0,0,0.0,0.0,0.0,0.5
6,6,63,63,0,0,0.0,0.0,0.0,0.5
7,7,17,17,1,0,0.058823529411764705,1.0,0.1111111111111111,0.5294117647058824
8,8,65,65,4,2,0.06153846153846154,0.6666666666666666,0.11267605633802817,0.5153846153846153
9,9,31,31,2,0,0.06451612903225806,1.0,0.12121212121212122,0.532258064516129
10,10,57,57,7,1,0.12280701754385964,0.875,0.2153846153846154,0.5526315789473685
11,11,48,48,17,6,0.3541666666666667,0.7391304347826086,0.4788732394366197,0.6145833333333334
12,12,36,36,3,1,0.08333333333333333,0.75,0.15,0.5277777777777778
13,13,17,17,3,3,0.17647058823529413,0.5,0.2608695652173913,0.5
14,14,77,77,2,1,0.025974025974025976,0.6666666666666666,0.05,0.5064935064935064
15,15,40,40,8,10,0.2,0.4444444444444444,0.2758620689655173,0.475
16,16,29,29,0,0,0.0,0.0,0.0,0.5
17,total,1043,1043,57,27,,,,
18,,,,,Micro avg.,0.05465004793863854,0.6785714285714286,0.10115350488021295,0.5143815915627996
19,,,,,Macro avg.,0.07498102472219292,0.5818769536800227,0.11855199497424417,0.5178725947908673
topk-9,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,20,14,0.6666666666666666,0.5882352941176471,0.625,0.6
2,2,288,288,76,68,0.2638888888888889,0.5277777777777778,0.35185185185185186,0.5138888888888888
3,3,87,87,67,35,0.7701149425287356,0.6568627450980392,0.708994708994709,0.6839080459770115
4,4,94,94,36,43,0.3829787234042553,0.45569620253164556,0.4161849710982659,0.4627659574468085
5,5,62,62,13,13,0.20967741935483872,0.5,0.29545454545454547,0.5
6,6,63,63,33,11,0.5238095238095238,0.75,0.6168224299065421,0.6746031746031746
7,7,17,17,16,15,0.9411764705882353,0.5161290322580645,0.6666666666666666,0.5294117647058824
8,8,65,65,62,59,0.9538461538461539,0.512396694214876,0.6666666666666666,0.5230769230769231
9,9,31,31,29,30,0.9354838709677419,0.4915254237288136,0.6444444444444444,0.4838709677419355
10,10,57,57,56,46,0.9824561403508771,0.5490196078431373,0.7044025157232705,0.5877192982456141
11,11,48,48,43,42,0.8958333333333334,0.5058823529411764,0.6466165413533834,0.5104166666666666
12,12,36,36,35,35,0.9722222222222222,0.5,0.660377358490566,0.5
13,13,17,17,17,13,1.0,0.5666666666666667,0.7234042553191489,0.6176470588235294
14,14,77,77,42,37,0.5454545454545454,0.5316455696202531,0.5384615384615384,0.5324675324675324
15,15,40,40,39,40,0.975,0.4936708860759494,0.6554621848739496,0.4875
16,16,29,29,10,12,0.3448275862068966,0.45454545454545453,0.39215686274509803,0.46551724137931033
17,total,1043,1043,594,513,,,,
18,,,,,Micro avg.,0.5695110258868649,0.5365853658536586,0.5525581395348838,0.538830297219559
19,,,,,Macro avg.,0.6684374404484067,0.5058855122011472,0.5478216201206263,0.539576089413134
topk-7,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,13,9,0.43333333333333335,0.5909090909090909,0.5,0.5666666666666667
2,2,288,288,54,37,0.1875,0.5934065934065934,0.28496042216358836,0.5295138888888888
3,3,87,87,62,29,0.7126436781609196,0.6813186813186813,0.6966292134831461,0.6896551724137931
4,4,94,94,28,28,0.2978723404255319,0.5,0.3733333333333333,0.5
5,5,62,62,6,5,0.0967741935483871,0.5454545454545454,0.1643835616438356,0.5080645161290323
6,6,63,63,19,4,0.30158730158730157,0.8260869565217391,0.4418604651162791,0.6190476190476191
7,7,17,17,16,15,0.9411764705882353,0.5161290322580645,0.6666666666666666,0.5294117647058824
8,8,65,65,61,53,0.9384615384615385,0.5350877192982456,0.6815642458100559,0.5615384615384615
9,9,31,31,23,27,0.7419354838709677,0.46,0.5679012345679013,0.43548387096774194
10,10,57,57,53,39,0.9298245614035088,0.5760869565217391,0.7114093959731544,0.6228070175438597
11,11,48,48,41,38,0.8541666666666666,0.5189873417721519,0.6456692913385826,0.53125
12,12,36,36,33,33,0.9166666666666666,0.5,0.6470588235294118,0.5
13,13,17,17,17,11,1.0,0.6071428571428571,0.7555555555555554,0.6764705882352942
14,14,77,77,27,21,0.35064935064935066,0.5625,0.432,0.538961038961039
15,15,40,40,39,40,0.975,0.4936708860759494,0.6554621848739496,0.4875
16,16,29,29,5,6,0.1724137931034483,0.45454545454545453,0.25000000000000006,0.4827586206896552
17,total,1043,1043,497,395,,,,
18,,,,,Micro avg.,0.47651006711409394,0.5571748878923767,0.5136950904392765,0.5488974113135187
19,,,,,Macro avg.,0.5794120810862268,0.5271368303073597,0.49849731729738,0.545831130928702
topk-5,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,8,3,0.26666666666666666,0.7272727272727273,0.3902439024390244,0.5833333333333334
2,2,288,288,32,17,0.1111111111111111,0.6530612244897959,0.18991097922848665,0.5260416666666666
3,3,87,87,51,18,0.5862068965517241,0.7391304347826086,0.6538461538461539,0.6896551724137931
4,4,94,94,19,13,0.20212765957446807,0.59375,0.30158730158730157,0.5319148936170213
5,5,62,62,2,1,0.03225806451612903,0.6666666666666666,0.06153846153846154,0.5080645161290323
6,6,63,63,10,1,0.15873015873015872,0.9090909090909091,0.2702702702702703,0.5714285714285714
7,7,17,17,15,13,0.8823529411764706,0.5357142857142857,0.6666666666666667,0.5588235294117647
8,8,65,65,56,50,0.8615384615384616,0.5283018867924528,0.6549707602339182,0.5461538461538461
9,9,31,31,18,17,0.5806451612903226,0.5142857142857142,0.5454545454545455,0.5161290322580645
10,10,57,57,48,28,0.8421052631578947,0.631578947368421,0.7218045112781954,0.6754385964912281
11,11,48,48,37,29,0.7708333333333334,0.5606060606060606,0.6491228070175438,0.5833333333333334
12,12,36,36,29,27,0.8055555555555556,0.5178571428571429,0.6304347826086957,0.5277777777777778
13,13,17,17,17,9,1.0,0.6538461538461539,0.7906976744186047,0.7352941176470589
14,14,77,77,12,6,0.15584415584415584,0.6666666666666666,0.25263157894736843,0.538961038961039
15,15,40,40,39,39,0.975,0.5,0.6610169491525423,0.5
16,16,29,29,3,2,0.10344827586206896,0.6,0.17647058823529413,0.5172413793103449
17,total,1043,1043,396,273,,,,
18,,,,,Micro avg.,0.3796740172579099,0.5919282511210763,0.46261682242990654,0.5589645254074784
19,,,,,Macro avg.,0.49026021793579533,0.5881075776729179,0.44803929017194544,0.5652700473489926
topk-3,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,6,1,0.2,0.8571428571428571,0.32432432432432434,0.5833333333333334
2,2,288,288,16,8,0.05555555555555555,0.6666666666666666,0.10256410256410256,0.5138888888888888
3,3,87,87,42,10,0.4827586206896552,0.8076923076923077,0.60431654676259,0.6839080459770115
4,4,94,94,10,5,0.10638297872340426,0.6666666666666666,0.18348623853211007,0.526595744680851
5,5,62,62,1,0,0.016129032258064516,1.0,0.031746031746031744,0.5080645161290323
6,6,63,63,2,0,0.031746031746031744,1.0,0.06153846153846154,0.5158730158730159
7,7,17,17,11,8,0.6470588235294118,0.5789473684210527,0.6111111111111113,0.5882352941176471
8,8,65,65,40,32,0.6153846153846154,0.5555555555555556,0.583941605839416,0.5615384615384615
9,9,31,31,11,13,0.3548387096774194,0.4583333333333333,0.39999999999999997,0.46774193548387094
10,10,57,57,30,14,0.5263157894736842,0.6818181818181818,0.594059405940594,0.6403508771929824
11,11,48,48,31,19,0.6458333333333334,0.62,0.6326530612244898,0.625
12,12,36,36,21,17,0.5833333333333334,0.5526315789473685,0.5675675675675677,0.5555555555555556
13,13,17,17,13,7,0.7647058823529411,0.65,0.7027027027027027,0.6764705882352942
14,14,77,77,6,2,0.07792207792207792,0.75,0.1411764705882353,0.525974025974026
15,15,40,40,38,38,0.95,0.5,0.6551724137931034,0.5
16,16,29,29,0,0,0.0,0.0,0.0,0.5
17,total,1043,1043,278,174,,,,
18,,,,,Micro avg.,0.2665388302972196,0.6150442477876106,0.37190635451505016,0.549856184084372
19,,,,,Macro avg.,0.35635086964585455,0.6085561480143523,0.3644917673079318,0.5572076637047041