Zero_Shot_Classifier_Tester_for_TP_FP / Reports /report-Model-1_Queries-0_Prompt-3_Strategies-argmax-threshold0.05-threshold0.25-threshold0.5-threshold0.75-topk9-topk7-topk5-topk3.csv
gorkaartola's picture
Upload report-Model-1_Queries-0_Prompt-3_Strategies-argmax-threshold0.05-threshold0.25-threshold0.5-threshold0.75-topk9-topk7-topk5-topk3.csv
4bca2dd
argmax_max,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,1,0,0.03333333333333333,1.0,0.06451612903225806,0.5166666666666667
2,2,288,288,183,127,0.6354166666666666,0.5903225806451613,0.6120401337792643,0.5972222222222222
3,3,87,87,28,7,0.3218390804597701,0.8,0.4590163934426229,0.6206896551724138
4,4,94,94,3,0,0.031914893617021274,1.0,0.06185567010309278,0.5159574468085106
5,5,62,62,9,9,0.14516129032258066,0.5,0.22500000000000003,0.5
6,6,63,63,27,1,0.42857142857142855,0.9642857142857143,0.5934065934065934,0.7063492063492064
7,7,17,17,9,4,0.5294117647058824,0.6923076923076923,0.5999999999999999,0.6470588235294118
8,8,65,65,1,1,0.015384615384615385,0.5,0.029850746268656723,0.5
9,9,31,31,11,6,0.3548387096774194,0.6470588235294118,0.4583333333333333,0.5806451612903226
10,10,57,57,5,4,0.08771929824561403,0.5555555555555556,0.1515151515151515,0.5087719298245614
11,11,48,48,22,17,0.4583333333333333,0.5641025641025641,0.5057471264367815,0.5520833333333334
12,12,36,36,0,1,0.0,0.0,0.0,0.4861111111111111
13,13,17,17,0,0,0.0,0.0,0.0,0.5
14,14,77,77,1,1,0.012987012987012988,0.5,0.025316455696202535,0.5
15,15,40,40,8,3,0.2,0.7272727272727273,0.3137254901960785,0.5625
16,16,29,29,5,15,0.1724137931034483,0.25,0.20408163265306123,0.3275862068965517
17,total,1043,1043,313,196,,,,
18,,,,,Micro avg.,0.3000958772770853,0.6149312377210217,0.40335051546391754,0.5560882070949185
19,,,,,Macro avg.,0.20160736590636041,0.546523862217578,0.25320028563900565,0.5365671625414301
threshold-0.05,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,2,0.0,0.0,0.0,0.0
1,1,30,30,16,7,0.5333333333333333,0.6956521739130435,0.6037735849056605,0.65
2,2,288,288,270,269,0.9375,0.5009276437847866,0.652962515114873,0.5017361111111112
3,3,87,87,77,56,0.8850574712643678,0.5789473684210527,0.7000000000000001,0.6206896551724138
4,4,94,94,45,28,0.4787234042553192,0.6164383561643836,0.5389221556886228,0.5904255319148937
5,5,62,62,43,55,0.6935483870967742,0.4387755102040816,0.5375,0.4032258064516129
6,6,63,63,55,32,0.873015873015873,0.632183908045977,0.7333333333333333,0.6825396825396826
7,7,17,17,13,12,0.7647058823529411,0.52,0.6190476190476191,0.5294117647058824
8,8,65,65,40,30,0.6153846153846154,0.5714285714285714,0.5925925925925927,0.5769230769230769
9,9,31,31,28,24,0.9032258064516129,0.5384615384615384,0.6746987951807228,0.5645161290322581
10,10,57,57,48,28,0.8421052631578947,0.631578947368421,0.7218045112781954,0.6754385964912281
11,11,48,48,47,47,0.9791666666666666,0.5,0.6619718309859155,0.5
12,12,36,36,24,25,0.6666666666666666,0.4897959183673469,0.5647058823529412,0.4861111111111111
13,13,17,17,5,5,0.29411764705882354,0.5,0.37037037037037035,0.5
14,14,77,77,54,35,0.7012987012987013,0.6067415730337079,0.6506024096385542,0.6233766233766234
15,15,40,40,18,18,0.45,0.5,0.4736842105263158,0.5
16,16,29,29,27,28,0.9310344827586207,0.4909090909090909,0.6428571428571428,0.4827586206896552
17,total,1043,1043,810,701,,,,
18,,,,,Micro avg.,0.7766059443911792,0.5360688285903376,0.634299138606108,0.5522531160115053
19,,,,,Macro avg.,0.6793461294566007,0.5183435647118824,0.5728721737572269,0.5227736887952675
threshold-0.25,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,2,0,0.06666666666666667,1.0,0.125,0.5333333333333333
2,2,288,288,208,195,0.7222222222222222,0.5161290322580645,0.602026049204052,0.5225694444444444
3,3,87,87,40,20,0.45977011494252873,0.6666666666666666,0.5442176870748299,0.6149425287356322
4,4,94,94,4,0,0.0425531914893617,1.0,0.08163265306122448,0.5212765957446809
5,5,62,62,27,39,0.43548387096774194,0.4090909090909091,0.421875,0.4032258064516129
6,6,63,63,47,9,0.746031746031746,0.8392857142857143,0.7899159663865546,0.8015873015873016
7,7,17,17,11,7,0.6470588235294118,0.6111111111111112,0.6285714285714287,0.6176470588235294
8,8,65,65,16,14,0.24615384615384617,0.5333333333333333,0.3368421052631579,0.5153846153846153
9,9,31,31,9,7,0.2903225806451613,0.5625,0.3829787234042554,0.532258064516129
10,10,57,57,30,12,0.5263157894736842,0.7142857142857143,0.6060606060606061,0.6578947368421053
11,11,48,48,41,43,0.8541666666666666,0.4880952380952381,0.6212121212121212,0.4791666666666667
12,12,36,36,3,4,0.08333333333333333,0.42857142857142855,0.13953488372093023,0.4861111111111111
13,13,17,17,0,2,0.0,0.0,0.0,0.4411764705882353
14,14,77,77,11,6,0.14285714285714285,0.6470588235294118,0.23404255319148934,0.5324675324675324
15,15,40,40,2,9,0.05,0.18181818181818182,0.07843137254901962,0.4125
16,16,29,29,14,19,0.4827586206896552,0.42424242424242425,0.45161290322580644,0.41379310344827586
17,total,1043,1043,465,386,,,,
18,,,,,Micro avg.,0.4458293384467881,0.5464159811985899,0.4910242872228089,0.5378715244487057
19,,,,,Macro avg.,0.34092321268642173,0.5307169751345998,0.3555267089956162,0.5285490805967767
threshold-0.5,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,0,0,0.0,0.0,0.0,0.5
2,2,288,288,83,61,0.2881944444444444,0.5763888888888888,0.3842592592592592,0.5381944444444444
3,3,87,87,1,1,0.011494252873563218,0.5,0.02247191011235955,0.5
4,4,94,94,0,0,0.0,0.0,0.0,0.5
5,5,62,62,18,13,0.2903225806451613,0.5806451612903226,0.3870967741935484,0.5403225806451613
6,6,63,63,40,3,0.6349206349206349,0.9302325581395349,0.7547169811320754,0.7936507936507936
7,7,17,17,6,3,0.35294117647058826,0.6666666666666666,0.46153846153846156,0.5882352941176471
8,8,65,65,3,4,0.046153846153846156,0.42857142857142855,0.08333333333333333,0.49230769230769234
9,9,31,31,0,1,0.0,0.0,0.0,0.4838709677419355
10,10,57,57,11,5,0.19298245614035087,0.6875,0.30136986301369867,0.5526315789473685
11,11,48,48,33,28,0.6875,0.5409836065573771,0.6055045871559634,0.5520833333333334
12,12,36,36,0,0,0.0,0.0,0.0,0.5
13,13,17,17,0,0,0.0,0.0,0.0,0.5
14,14,77,77,3,1,0.03896103896103896,0.75,0.07407407407407407,0.512987012987013
15,15,40,40,0,1,0.0,0.0,0.0,0.4875
16,16,29,29,8,9,0.27586206896551724,0.47058823529411764,0.34782608695652173,0.4827586206896552
17,total,1043,1043,206,130,,,,
18,,,,,Micro avg.,0.1975071907957814,0.6130952380952381,0.2987672226250907,0.5364333652924257
19,,,,,Macro avg.,0.16584308821030266,0.3606809732593139,0.20130537239819385,0.5308554305214731
threshold-0.75,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,0,0,0.0,0.0,0.0,0.5
2,2,288,288,7,5,0.024305555555555556,0.5833333333333334,0.04666666666666666,0.5034722222222222
3,3,87,87,0,0,0.0,0.0,0.0,0.5
4,4,94,94,0,0,0.0,0.0,0.0,0.5
5,5,62,62,2,1,0.03225806451612903,0.6666666666666666,0.06153846153846154,0.5080645161290323
6,6,63,63,16,0,0.25396825396825395,1.0,0.4050632911392405,0.626984126984127
7,7,17,17,2,1,0.11764705882352941,0.6666666666666666,0.2,0.5294117647058824
8,8,65,65,0,1,0.0,0.0,0.0,0.49230769230769234
9,9,31,31,0,0,0.0,0.0,0.0,0.5
10,10,57,57,0,0,0.0,0.0,0.0,0.5
11,11,48,48,10,1,0.20833333333333334,0.9090909090909091,0.3389830508474576,0.59375
12,12,36,36,0,0,0.0,0.0,0.0,0.5
13,13,17,17,0,0,0.0,0.0,0.0,0.5
14,14,77,77,1,0,0.012987012987012988,1.0,0.025641025641025647,0.5064935064935064
15,15,40,40,0,0,0.0,0.0,0.0,0.5
16,16,29,29,2,2,0.06896551724137931,0.5,0.1212121212121212,0.5
17,total,1043,1043,40,11,,,,
18,,,,,Micro avg.,0.038350910834132314,0.7843137254901961,0.07312614259597806,0.513902205177373
19,,,,,Macro avg.,0.042262635083834925,0.31327985739750447,0.07053556570852783,0.5153225781672037
topk-9,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,1,0.5,0.5,0.5,0.5
1,1,30,30,20,11,0.6666666666666666,0.6451612903225806,0.6557377049180327,0.65
2,2,288,288,287,287,0.9965277777777778,0.5,0.665893271461717,0.5
3,3,87,87,79,64,0.9080459770114943,0.5524475524475524,0.6869565217391305,0.5862068965517241
4,4,94,94,66,47,0.7021276595744681,0.584070796460177,0.6376811594202898,0.601063829787234
5,5,62,62,50,58,0.8064516129032258,0.46296296296296297,0.5882352941176471,0.43548387096774194
6,6,63,63,59,43,0.9365079365079365,0.5784313725490197,0.7151515151515152,0.626984126984127
7,7,17,17,16,15,0.9411764705882353,0.5161290322580645,0.6666666666666666,0.5294117647058824
8,8,65,65,44,33,0.676923076923077,0.5714285714285714,0.619718309859155,0.5846153846153846
9,9,31,31,31,31,1.0,0.5,0.6666666666666666,0.5
10,10,57,57,50,31,0.8771929824561403,0.6172839506172839,0.7246376811594202,0.6666666666666666
11,11,48,48,48,48,1.0,0.5,0.6666666666666666,0.5
12,12,36,36,27,21,0.75,0.5625,0.6428571428571429,0.5833333333333334
13,13,17,17,8,3,0.47058823529411764,0.7272727272727273,0.5714285714285714,0.6470588235294118
14,14,77,77,58,38,0.7532467532467533,0.6041666666666666,0.6705202312138729,0.6298701298701299
15,15,40,40,34,33,0.85,0.5074626865671642,0.6355140186915889,0.5125
16,16,29,29,28,29,0.9655172413793104,0.49122807017543857,0.6511627906976745,0.4827586206896552
17,total,1043,1043,906,793,,,,
18,,,,,Micro avg.,0.8686481303930969,0.533254855797528,0.660831509846827,0.5541706615532119
19,,,,,Macro avg.,0.8118219053134825,0.5541497458663652,0.645029071336221,0.5609384381000759
topk-7,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,1,1,0.5,0.5,0.5,0.5
1,1,30,30,14,8,0.4666666666666667,0.6363636363636364,0.5384615384615385,0.6
2,2,288,288,287,284,0.9965277777777778,0.5026269702276708,0.6682188591385332,0.5052083333333334
3,3,87,87,78,58,0.896551724137931,0.5735294117647058,0.6995515695067264,0.6149425287356322
4,4,94,94,52,36,0.5531914893617021,0.5909090909090909,0.5714285714285714,0.5851063829787234
5,5,62,62,40,49,0.6451612903225806,0.449438202247191,0.5298013245033112,0.4274193548387097
6,6,63,63,58,28,0.9206349206349206,0.6744186046511628,0.7785234899328859,0.7380952380952381
7,7,17,17,15,13,0.8823529411764706,0.5357142857142857,0.6666666666666667,0.5588235294117647
8,8,65,65,38,27,0.5846153846153846,0.5846153846153846,0.5846153846153846,0.5846153846153846
9,9,31,31,29,26,0.9354838709677419,0.5272727272727272,0.6744186046511628,0.5483870967741935
10,10,57,57,47,22,0.8245614035087719,0.6811594202898551,0.746031746031746,0.7192982456140351
11,11,48,48,48,48,1.0,0.5,0.6666666666666666,0.5
12,12,36,36,21,15,0.5833333333333334,0.5833333333333334,0.5833333333333334,0.5833333333333334
13,13,17,17,7,2,0.4117647058823529,0.7777777777777778,0.5384615384615384,0.6470588235294118
14,14,77,77,39,18,0.5064935064935064,0.6842105263157895,0.5820895522388059,0.6363636363636364
15,15,40,40,31,27,0.775,0.5344827586206896,0.6326530612244898,0.55
16,16,29,29,26,29,0.896551724137931,0.4727272727272727,0.6190476190476191,0.4482758620689655
17,total,1043,1043,831,691,,,,
18,,,,,Micro avg.,0.7967401725790988,0.5459921156373193,0.647953216374269,0.5671140939597316
19,,,,,Macro avg.,0.7281700434715924,0.5769752589900338,0.6223511485828812,0.5733486911583743
topk-5,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,10,4,0.3333333333333333,0.7142857142857143,0.4545454545454545,0.6
2,2,288,288,279,259,0.96875,0.5185873605947955,0.6755447941888619,0.5347222222222222
3,3,87,87,72,48,0.8275862068965517,0.6,0.6956521739130435,0.6379310344827587
4,4,94,94,38,22,0.40425531914893614,0.6333333333333333,0.49350649350649345,0.5851063829787234
5,5,62,62,31,41,0.5,0.4305555555555556,0.4626865671641791,0.41935483870967744
6,6,63,63,52,16,0.8253968253968254,0.7647058823529411,0.7938931297709922,0.7857142857142857
7,7,17,17,14,11,0.8235294117647058,0.56,0.6666666666666666,0.5882352941176471
8,8,65,65,25,19,0.38461538461538464,0.5681818181818182,0.45871559633027525,0.5461538461538461
9,9,31,31,27,20,0.8709677419354839,0.574468085106383,0.6923076923076925,0.6129032258064516
10,10,57,57,43,16,0.7543859649122807,0.7288135593220338,0.7413793103448276,0.7368421052631579
11,11,48,48,48,47,1.0,0.5052631578947369,0.6713286713286714,0.5104166666666666
12,12,36,36,18,8,0.5,0.6923076923076923,0.5806451612903226,0.6388888888888888
13,13,17,17,3,1,0.17647058823529413,0.75,0.2857142857142857,0.5588235294117647
14,14,77,77,22,9,0.2857142857142857,0.7096774193548387,0.40740740740740744,0.5844155844155844
15,15,40,40,26,22,0.65,0.5416666666666666,0.5909090909090908,0.55
16,16,29,29,24,28,0.8275862068965517,0.46153846153846156,0.5925925925925927,0.43103448275862066
17,total,1043,1043,732,571,,,,
18,,,,,Micro avg.,0.7018216682646213,0.5617805065234075,0.6240409207161125,0.5771812080536913
19,,,,,Macro avg.,0.5960347805205667,0.573728512146763,0.5449114757635799,0.5776789639758997
topk-3,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
0,0,2,2,0,0,0.0,0.0,0.0,0.5
1,1,30,30,4,0,0.13333333333333333,1.0,0.23529411764705882,0.5666666666666667
2,2,288,288,260,211,0.9027777777777778,0.5520169851380042,0.6851119894598156,0.5850694444444444
3,3,87,87,60,32,0.6896551724137931,0.6521739130434783,0.670391061452514,0.6609195402298851
4,4,94,94,23,8,0.24468085106382978,0.7419354838709677,0.368,0.5797872340425532
5,5,62,62,22,30,0.3548387096774194,0.4230769230769231,0.3859649122807018,0.43548387096774194
6,6,63,63,45,10,0.7142857142857143,0.8181818181818182,0.7627118644067797,0.7777777777777778
7,7,17,17,11,10,0.6470588235294118,0.5238095238095238,0.5789473684210527,0.5294117647058824
8,8,65,65,9,9,0.13846153846153847,0.5,0.2168674698795181,0.5
9,9,31,31,23,12,0.7419354838709677,0.6571428571428571,0.6969696969696969,0.6774193548387096
10,10,57,57,23,13,0.40350877192982454,0.6388888888888888,0.49462365591397844,0.5877192982456141
11,11,48,48,46,41,0.9583333333333334,0.5287356321839081,0.6814814814814815,0.5520833333333334
12,12,36,36,7,2,0.19444444444444445,0.7777777777777778,0.3111111111111111,0.5694444444444444
13,13,17,17,0,1,0.0,0.0,0.0,0.47058823529411764
14,14,77,77,7,2,0.09090909090909091,0.7777777777777778,0.1627906976744186,0.5324675324675324
15,15,40,40,19,13,0.475,0.59375,0.5277777777777778,0.575
16,16,29,29,21,25,0.7241379310344828,0.45652173913043476,0.5599999999999999,0.43103448275862066
17,total,1043,1043,580,419,,,,
18,,,,,Micro avg.,0.5560882070949185,0.5805805805805806,0.5680705190989226,0.5771812080536913
19,,,,,Macro avg.,0.436080057415586,0.5671640776483741,0.43164960026328847,0.5606395870716072