gorkaartola commited on
Commit
1852566
1 Parent(s): 106a138

Delete report-Model-0_Queries-2_Prompt-2_Strategies-argmax-threshold0.05-threshold0.25-threshold0.5-threshold0.75-topk9-topk7-topk5-topk3.csv

Browse files
report-Model-0_Queries-2_Prompt-2_Strategies-argmax-threshold0.05-threshold0.25-threshold0.5-threshold0.75-topk9-topk7-topk5-topk3.csv DELETED
@@ -1,189 +0,0 @@
1
- argmax_max,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
2
- 0,0,2,2,1,0,0.5,1.0,0.6666666666666666,0.75
3
- 1,1,30,30,6,2,0.2,0.75,0.31578947368421056,0.5666666666666667
4
- 2,2,288,288,144,106,0.5,0.576,0.5353159851301115,0.5659722222222222
5
- 3,3,87,87,49,15,0.5632183908045977,0.765625,0.6490066225165564,0.6954022988505747
6
- 4,4,94,94,47,30,0.5,0.6103896103896104,0.5497076023391813,0.5904255319148937
7
- 5,5,62,62,15,13,0.24193548387096775,0.5357142857142857,0.33333333333333337,0.5161290322580645
8
- 6,6,63,63,33,1,0.5238095238095238,0.9705882352941176,0.6804123711340205,0.753968253968254
9
- 7,7,17,17,7,2,0.4117647058823529,0.7777777777777778,0.5384615384615384,0.6470588235294118
10
- 8,8,65,65,15,10,0.23076923076923078,0.6,0.33333333333333337,0.5384615384615384
11
- 9,9,31,31,3,1,0.0967741935483871,0.75,0.1714285714285714,0.532258064516129
12
- 10,10,57,57,30,16,0.5263157894736842,0.6521739130434783,0.5825242718446602,0.6228070175438597
13
- 11,11,48,48,4,1,0.08333333333333333,0.8,0.1509433962264151,0.53125
14
- 12,12,36,36,12,10,0.3333333333333333,0.5454545454545454,0.41379310344827586,0.5277777777777778
15
- 13,13,17,17,12,1,0.7058823529411765,0.9230769230769231,0.8000000000000002,0.8235294117647058
16
- 14,14,77,77,32,14,0.4155844155844156,0.6956521739130435,0.5203252032520326,0.6168831168831169
17
- 15,15,40,40,12,16,0.3,0.42857142857142855,0.3529411764705882,0.45
18
- 16,16,29,29,6,5,0.20689655172413793,0.5454545454545454,0.3,0.5172413793103449
19
- 17,total,1043,1043,428,243,,,,
20
- 18,,,,,Micro avg.,0.4103547459252157,0.6378539493293591,0.4994165694282381,0.588686481303931
21
- 19,,,,,Macro avg.,0.37291866500442006,0.7015575552170444,0.46435192054526436,0.6026959491569152
22
- threshold-0.05,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
23
- 0,0,2,2,2,1,1.0,0.6666666666666666,0.8,0.75
24
- 1,1,30,30,27,18,0.9,0.6,0.7200000000000001,0.65
25
- 2,2,288,288,280,268,0.9722222222222222,0.5109489051094891,0.6698564593301435,0.5208333333333334
26
- 3,3,87,87,86,73,0.9885057471264368,0.5408805031446541,0.6991869918699186,0.5747126436781609
27
- 4,4,94,94,86,82,0.9148936170212766,0.5119047619047619,0.6564885496183206,0.5212765957446809
28
- 5,5,62,62,41,51,0.6612903225806451,0.44565217391304346,0.5324675324675325,0.41935483870967744
29
- 6,6,63,63,60,36,0.9523809523809523,0.625,0.7547169811320755,0.6904761904761905
30
- 7,7,17,17,15,9,0.8823529411764706,0.625,0.7317073170731708,0.6764705882352942
31
- 8,8,65,65,55,52,0.8461538461538461,0.514018691588785,0.6395348837209301,0.5230769230769231
32
- 9,9,31,31,30,27,0.967741935483871,0.5263157894736842,0.6818181818181819,0.5483870967741935
33
- 10,10,57,57,52,45,0.9122807017543859,0.5360824742268041,0.6753246753246752,0.5614035087719298
34
- 11,11,48,48,46,44,0.9583333333333334,0.5111111111111111,0.6666666666666666,0.5208333333333334
35
- 12,12,36,36,31,32,0.8611111111111112,0.49206349206349204,0.6262626262626263,0.4861111111111111
36
- 13,13,17,17,17,8,1.0,0.68,0.8095238095238095,0.7647058823529411
37
- 14,14,77,77,66,54,0.8571428571428571,0.55,0.6700507614213198,0.577922077922078
38
- 15,15,40,40,40,40,1.0,0.5,0.6666666666666666,0.5
39
- 16,16,29,29,29,29,1.0,0.5,0.6666666666666666,0.5
40
- 17,total,1043,1043,963,869,,,,
41
- 18,,,,,Micro avg.,0.9232981783317353,0.5256550218340611,0.6699130434782609,0.5450623202301055
42
- 19,,,,,Macro avg.,0.9220240933816124,0.5491555628942641,0.6862905158566296,0.5756214190305793
43
- threshold-0.25,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
44
- 0,0,2,2,2,0,1.0,1.0,1.0,1.0
45
- 1,1,30,30,26,17,0.8666666666666667,0.6046511627906976,0.7123287671232877,0.65
46
- 2,2,288,288,268,258,0.9305555555555556,0.5095057034220533,0.6584766584766586,0.5173611111111112
47
- 3,3,87,87,80,67,0.9195402298850575,0.54421768707483,0.6837606837606839,0.5747126436781609
48
- 4,4,94,94,78,76,0.8297872340425532,0.5064935064935064,0.6290322580645161,0.5106382978723404
49
- 5,5,62,62,35,40,0.5645161290322581,0.4666666666666667,0.5109489051094891,0.4596774193548387
50
- 6,6,63,63,60,30,0.9523809523809523,0.6666666666666666,0.7843137254901961,0.7380952380952381
51
- 7,7,17,17,13,9,0.7647058823529411,0.5909090909090909,0.6666666666666667,0.6176470588235294
52
- 8,8,65,65,45,45,0.6923076923076923,0.5,0.5806451612903226,0.5
53
- 9,9,31,31,25,22,0.8064516129032258,0.5319148936170213,0.641025641025641,0.5483870967741935
54
- 10,10,57,57,49,40,0.8596491228070176,0.550561797752809,0.6712328767123288,0.5789473684210527
55
- 11,11,48,48,45,42,0.9375,0.5172413793103449,0.6666666666666667,0.53125
56
- 12,12,36,36,28,28,0.7777777777777778,0.5,0.6086956521739131,0.5
57
- 13,13,17,17,15,7,0.8823529411764706,0.6818181818181818,0.7692307692307693,0.7352941176470589
58
- 14,14,77,77,64,45,0.8311688311688312,0.5871559633027523,0.6881720430107527,0.6233766233766234
59
- 15,15,40,40,40,39,1.0,0.5063291139240507,0.6722689075630253,0.5125
60
- 16,16,29,29,29,28,1.0,0.5087719298245614,0.6744186046511628,0.5172413793103449
61
- 17,total,1043,1043,902,793,,,,
62
- 18,,,,,Micro avg.,0.8648130393096836,0.5321533923303835,0.6588750913075236,0.5522531160115053
63
- 19,,,,,Macro avg.,0.8597270957680588,0.5748766907984255,0.6834049404127106,0.5950075502626173
64
- threshold-0.5,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
65
- 0,0,2,2,2,0,1.0,1.0,1.0,1.0
66
- 1,1,30,30,22,14,0.7333333333333333,0.6111111111111112,0.6666666666666666,0.6333333333333333
67
- 2,2,288,288,263,232,0.9131944444444444,0.5313131313131313,0.6717752234993615,0.5538194444444444
68
- 3,3,87,87,75,55,0.8620689655172413,0.5769230769230769,0.6912442396313364,0.6149425287356322
69
- 4,4,94,94,70,72,0.7446808510638298,0.49295774647887325,0.5932203389830508,0.48936170212765956
70
- 5,5,62,62,29,33,0.46774193548387094,0.46774193548387094,0.46774193548387094,0.46774193548387094
71
- 6,6,63,63,53,24,0.8412698412698413,0.6883116883116883,0.7571428571428571,0.7301587301587301
72
- 7,7,17,17,13,6,0.7647058823529411,0.6842105263157895,0.7222222222222222,0.7058823529411765
73
- 8,8,65,65,40,34,0.6153846153846154,0.5405405405405406,0.5755395683453237,0.5461538461538461
74
- 9,9,31,31,23,19,0.7419354838709677,0.5476190476190477,0.6301369863013699,0.5645161290322581
75
- 10,10,57,57,47,33,0.8245614035087719,0.5875,0.6861313868613139,0.6228070175438597
76
- 11,11,48,48,41,38,0.8541666666666666,0.5189873417721519,0.6456692913385826,0.53125
77
- 12,12,36,36,24,26,0.6666666666666666,0.48,0.5581395348837209,0.4722222222222222
78
- 13,13,17,17,15,6,0.8823529411764706,0.7142857142857143,0.7894736842105262,0.7647058823529411
79
- 14,14,77,77,63,38,0.8181818181818182,0.6237623762376238,0.7078651685393259,0.6623376623376623
80
- 15,15,40,40,39,39,0.975,0.5,0.6610169491525423,0.5
81
- 16,16,29,29,24,28,0.8275862068965517,0.46153846153846156,0.5925925925925927,0.43103448275862066
82
- 17,total,1043,1043,843,697,,,,
83
- 18,,,,,Micro avg.,0.8082454458293384,0.5474025974025974,0.6527293844367016,0.5699904122722914
84
- 19,,,,,Macro avg.,0.7960488856363548,0.5898119234077106,0.6715634497561567,0.6053098393897799
85
- threshold-0.75,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
86
- 0,0,2,2,2,0,1.0,1.0,1.0,1.0
87
- 1,1,30,30,19,8,0.6333333333333333,0.7037037037037037,0.6666666666666667,0.6833333333333333
88
- 2,2,288,288,242,202,0.8402777777777778,0.545045045045045,0.6612021857923498,0.5694444444444444
89
- 3,3,87,87,66,38,0.7586206896551724,0.6346153846153846,0.6910994764397905,0.6609195402298851
90
- 4,4,94,94,64,59,0.6808510638297872,0.5203252032520326,0.5898617511520738,0.526595744680851
91
- 5,5,62,62,23,25,0.3709677419354839,0.4791666666666667,0.4181818181818182,0.4838709677419355
92
- 6,6,63,63,46,18,0.7301587301587301,0.71875,0.7244094488188977,0.7222222222222222
93
- 7,7,17,17,10,6,0.5882352941176471,0.625,0.6060606060606061,0.6176470588235294
94
- 8,8,65,65,32,24,0.49230769230769234,0.5714285714285714,0.5289256198347108,0.5615384615384615
95
- 9,9,31,31,15,8,0.4838709677419355,0.6521739130434783,0.5555555555555556,0.6129032258064516
96
- 10,10,57,57,45,27,0.7894736842105263,0.625,0.6976744186046512,0.6578947368421053
97
- 11,11,48,48,34,27,0.7083333333333334,0.5573770491803278,0.6238532110091743,0.5729166666666666
98
- 12,12,36,36,22,20,0.6111111111111112,0.5238095238095238,0.5641025641025642,0.5277777777777778
99
- 13,13,17,17,14,5,0.8235294117647058,0.7368421052631579,0.7777777777777778,0.7647058823529411
100
- 14,14,77,77,59,35,0.7662337662337663,0.6276595744680851,0.6900584795321637,0.6558441558441559
101
- 15,15,40,40,34,32,0.85,0.5151515151515151,0.6415094339622641,0.525
102
- 16,16,29,29,24,28,0.8275862068965517,0.46153846153846156,0.5925925925925927,0.43103448275862066
103
- 17,total,1043,1043,751,562,,,,
104
- 18,,,,,Micro avg.,0.7200383509108341,0.571972581873572,0.637521222410866,0.5906040268456376
105
- 19,,,,,Macro avg.,0.7032288708475031,0.617505101009762,0.6487959768284506,0.6219793353566695
106
- topk-9,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
107
- 0,0,2,2,2,0,1.0,1.0,1.0,1.0
108
- 1,1,30,30,26,18,0.8666666666666667,0.5909090909090909,0.7027027027027029,0.6333333333333333
109
- 2,2,288,288,276,267,0.9583333333333334,0.5082872928176796,0.6642599277978339,0.515625
110
- 3,3,87,87,77,66,0.8850574712643678,0.5384615384615384,0.6695652173913044,0.5632183908045977
111
- 4,4,94,94,78,81,0.8297872340425532,0.49056603773584906,0.616600790513834,0.48404255319148937
112
- 5,5,62,62,29,33,0.46774193548387094,0.46774193548387094,0.46774193548387094,0.46774193548387094
113
- 6,6,63,63,57,32,0.9047619047619048,0.6404494382022472,0.7500000000000001,0.6984126984126984
114
- 7,7,17,17,13,12,0.7647058823529411,0.52,0.6190476190476191,0.5294117647058824
115
- 8,8,65,65,55,47,0.8461538461538461,0.5392156862745098,0.6586826347305389,0.5615384615384615
116
- 9,9,31,31,28,25,0.9032258064516129,0.5283018867924528,0.6666666666666666,0.5483870967741935
117
- 10,10,57,57,50,40,0.8771929824561403,0.5555555555555556,0.6802721088435374,0.5877192982456141
118
- 11,11,48,48,45,41,0.9375,0.5232558139534884,0.6716417910447762,0.5416666666666666
119
- 12,12,36,36,31,30,0.8611111111111112,0.5081967213114754,0.6391752577319588,0.5138888888888888
120
- 13,13,17,17,17,6,1.0,0.7391304347826086,0.85,0.8235294117647058
121
- 14,14,77,77,67,43,0.8701298701298701,0.6090909090909091,0.716577540106952,0.6558441558441559
122
- 15,15,40,40,40,40,1.0,0.5,0.6666666666666666,0.5
123
- 16,16,29,29,29,29,1.0,0.5,0.6666666666666666,0.5
124
- 17,total,1043,1043,920,810,,,,
125
- 18,,,,,Micro avg.,0.8820709491850431,0.5317919075144508,0.6635412910205553,0.552732502396932
126
- 19,,,,,Macro avg.,0.8807275320122482,0.5740683730218398,0.6886039720820545,0.5955505679796799
127
- topk-7,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
128
- 0,0,2,2,2,0,1.0,1.0,1.0,1.0
129
- 1,1,30,30,23,15,0.7666666666666667,0.6052631578947368,0.676470588235294,0.6333333333333333
130
- 2,2,288,288,271,250,0.9409722222222222,0.5201535508637236,0.6699629171817058,0.5364583333333334
131
- 3,3,87,87,74,61,0.8505747126436781,0.5481481481481482,0.6666666666666666,0.5747126436781609
132
- 4,4,94,94,73,75,0.776595744680851,0.49324324324324326,0.6033057851239669,0.48936170212765956
133
- 5,5,62,62,26,29,0.41935483870967744,0.4727272727272727,0.4444444444444444,0.47580645161290325
134
- 6,6,63,63,55,24,0.873015873015873,0.6962025316455697,0.7746478873239436,0.746031746031746
135
- 7,7,17,17,12,9,0.7058823529411765,0.5714285714285714,0.6315789473684211,0.5882352941176471
136
- 8,8,65,65,47,35,0.7230769230769231,0.573170731707317,0.6394557823129251,0.5923076923076923
137
- 9,9,31,31,26,19,0.8387096774193549,0.5777777777777777,0.6842105263157895,0.6129032258064516
138
- 10,10,57,57,48,36,0.8421052631578947,0.5714285714285714,0.6808510638297872,0.6052631578947368
139
- 11,11,48,48,38,35,0.7916666666666666,0.5205479452054794,0.628099173553719,0.53125
140
- 12,12,36,36,28,26,0.7777777777777778,0.5185185185185185,0.6222222222222222,0.5277777777777778
141
- 13,13,17,17,16,6,0.9411764705882353,0.7272727272727273,0.8205128205128205,0.7941176470588235
142
- 14,14,77,77,60,38,0.7792207792207793,0.6122448979591837,0.6857142857142857,0.6428571428571429
143
- 15,15,40,40,40,38,1.0,0.5128205128205128,0.6779661016949152,0.525
144
- 16,16,29,29,29,29,1.0,0.5,0.6666666666666666,0.5
145
- 17,total,1043,1043,868,725,,,,
146
- 18,,,,,Micro avg.,0.8322147651006712,0.5448838669177652,0.6585735963581184,0.5685522531160115
147
- 19,,,,,Macro avg.,0.8251056452228104,0.5894675387436089,0.6807515223039748,0.6103185969374947
148
- topk-5,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
149
- 0,0,2,2,1,0,0.5,1.0,0.6666666666666666,0.75
150
- 1,1,30,30,19,8,0.6333333333333333,0.7037037037037037,0.6666666666666667,0.6833333333333333
151
- 2,2,288,288,260,223,0.9027777777777778,0.5383022774327122,0.6744487678339819,0.5642361111111112
152
- 3,3,87,87,69,52,0.7931034482758621,0.5702479338842975,0.6634615384615384,0.5977011494252874
153
- 4,4,94,94,68,66,0.723404255319149,0.5074626865671642,0.5964912280701754,0.5106382978723404
154
- 5,5,62,62,22,28,0.3548387096774194,0.44,0.3928571428571428,0.45161290322580644
155
- 6,6,63,63,51,18,0.8095238095238095,0.7391304347826086,0.7727272727272727,0.7619047619047619
156
- 7,7,17,17,11,7,0.6470588235294118,0.6111111111111112,0.6285714285714287,0.6176470588235294
157
- 8,8,65,65,36,25,0.5538461538461539,0.5901639344262295,0.5714285714285714,0.5846153846153846
158
- 9,9,31,31,22,14,0.7096774193548387,0.6111111111111112,0.6567164179104478,0.6290322580645161
159
- 10,10,57,57,45,33,0.7894736842105263,0.5769230769230769,0.6666666666666666,0.6052631578947368
160
- 11,11,48,48,34,32,0.7083333333333334,0.5151515151515151,0.5964912280701754,0.5208333333333334
161
- 12,12,36,36,22,21,0.6111111111111112,0.5116279069767442,0.5569620253164557,0.5138888888888888
162
- 13,13,17,17,15,5,0.8823529411764706,0.75,0.8108108108108107,0.7941176470588235
163
- 14,14,77,77,57,32,0.7402597402597403,0.6404494382022472,0.6867469879518073,0.6623376623376623
164
- 15,15,40,40,39,35,0.975,0.527027027027027,0.6842105263157895,0.55
165
- 16,16,29,29,26,27,0.896551724137931,0.49056603773584906,0.6341463414634146,0.4827586206896552
166
- 17,total,1043,1043,797,626,,,,
167
- 18,,,,,Micro avg.,0.7641418983700863,0.5600843288826423,0.6463909164639091,0.5819750719079578
168
- 19,,,,,Macro avg.,0.7194497802862864,0.6072340114726703,0.6427100169287655,0.6047012099164218
169
- topk-3,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
170
- 0,0,2,2,1,0,0.5,1.0,0.6666666666666666,0.75
171
- 1,1,30,30,13,5,0.43333333333333335,0.7222222222222222,0.5416666666666666,0.6333333333333333
172
- 2,2,288,288,234,186,0.8125,0.5571428571428572,0.6610169491525424,0.5833333333333334
173
- 3,3,87,87,63,32,0.7241379310344828,0.6631578947368421,0.6923076923076923,0.6781609195402298
174
- 4,4,94,94,65,59,0.6914893617021277,0.5241935483870968,0.5963302752293578,0.5319148936170213
175
- 5,5,62,62,20,25,0.3225806451612903,0.4444444444444444,0.3738317757009346,0.4596774193548387
176
- 6,6,63,63,46,9,0.7301587301587301,0.8363636363636363,0.7796610169491525,0.7936507936507936
177
- 7,7,17,17,8,4,0.47058823529411764,0.6666666666666666,0.5517241379310345,0.6176470588235294
178
- 8,8,65,65,27,16,0.4153846153846154,0.627906976744186,0.5,0.5846153846153846
179
- 9,9,31,31,15,6,0.4838709677419355,0.7142857142857143,0.5769230769230769,0.6451612903225806
180
- 10,10,57,57,39,27,0.6842105263157895,0.5909090909090909,0.6341463414634148,0.6052631578947368
181
- 11,11,48,48,23,26,0.4791666666666667,0.46938775510204084,0.4742268041237114,0.46875
182
- 12,12,36,36,17,17,0.4722222222222222,0.5,0.4857142857142857,0.5
183
- 13,13,17,17,15,5,0.8823529411764706,0.75,0.8108108108108107,0.7941176470588235
184
- 14,14,77,77,49,24,0.6363636363636364,0.6712328767123288,0.6533333333333333,0.6623376623376623
185
- 15,15,40,40,36,31,0.9,0.5373134328358209,0.6728971962616822,0.5625
186
- 16,16,29,29,19,24,0.6551724137931034,0.4418604651162791,0.5277777777777777,0.41379310344827586
187
- 17,total,1043,1043,690,496,,,,
188
- 18,,,,,Micro avg.,0.6615532118887824,0.581787521079258,0.6191117092866757,0.5930009587727708
189
- 19,,,,,Macro avg.,0.6055018956675602,0.6304169165687781,0.5999432239418904,0.6049562351370908