Spaces:
Runtime error
Runtime error
Commit
•
f0612b0
1
Parent(s):
3b2c042
Upload report-Model-1_Queries-3_Prompt-0_Strategies-argmax-threshold0.05-threshold0.25-threshold0.5-threshold0.75-topk9-topk7-topk5-topk3.csv
Browse files
Reports/report-Model-1_Queries-3_Prompt-0_Strategies-argmax-threshold0.05-threshold0.25-threshold0.5-threshold0.75-topk9-topk7-topk5-topk3.csv
ADDED
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
argmax_max,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
|
2 |
+
0,0,2,2,0,0,0,0.0,0.0,0.5
|
3 |
+
1,1,30,30,0,0,0,0.0,0.0,0.5
|
4 |
+
2,2,288,288,288,287,1,0.5008695652173913,0.6674391657010429,0.5017361111111112
|
5 |
+
3,3,87,87,0,0,0,0.0,0.0,0.5
|
6 |
+
4,4,94,94,0,0,0,0.0,0.0,0.5
|
7 |
+
5,5,62,62,0,0,0,0.0,0.0,0.5
|
8 |
+
6,6,63,63,0,0,0,0.0,0.0,0.5
|
9 |
+
7,7,17,17,0,0,0,0.0,0.0,0.5
|
10 |
+
8,8,65,65,0,0,0,0.0,0.0,0.5
|
11 |
+
9,9,31,31,0,0,0,0.0,0.0,0.5
|
12 |
+
10,10,57,57,0,0,0,0.0,0.0,0.5
|
13 |
+
11,11,48,48,0,0,0,0.0,0.0,0.5
|
14 |
+
12,12,36,36,0,0,0,0.0,0.0,0.5
|
15 |
+
13,13,17,17,0,0,0,0.0,0.0,0.5
|
16 |
+
14,14,77,77,0,0,0,0.0,0.0,0.5
|
17 |
+
15,15,40,40,0,0,0,0.0,0.0,0.5
|
18 |
+
16,16,29,29,0,0,0,0.0,0.0,0.5
|
19 |
+
17,total,1043,1043,288,287,,,,
|
20 |
+
18,,,,,Micro avg.,0.27612655800575264,0.5008695652173913,0.3559950556242275,0.5004793863854267
|
21 |
+
19,,,,,Macro avg.,0.058823529411764705,0.02946291560102302,0.039261127394178995,0.5001021241830065
|
22 |
+
threshold-0.05,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
|
23 |
+
0,0,2,2,2,2,1.0,0.5,0.6666666666666666,0.5
|
24 |
+
1,1,30,30,30,30,1.0,0.5,0.6666666666666666,0.5
|
25 |
+
2,2,288,288,288,288,1.0,0.5,0.6666666666666666,0.5
|
26 |
+
3,3,87,87,87,87,1.0,0.5,0.6666666666666666,0.5
|
27 |
+
4,4,94,94,94,91,1.0,0.5081081081081081,0.6738351254480287,0.5159574468085106
|
28 |
+
5,5,62,62,62,62,1.0,0.5,0.6666666666666666,0.5
|
29 |
+
6,6,63,63,63,63,1.0,0.5,0.6666666666666666,0.5
|
30 |
+
7,7,17,17,17,17,1.0,0.5,0.6666666666666666,0.5
|
31 |
+
8,8,65,65,65,65,1.0,0.5,0.6666666666666666,0.5
|
32 |
+
9,9,31,31,31,31,1.0,0.5,0.6666666666666666,0.5
|
33 |
+
10,10,57,57,57,57,1.0,0.5,0.6666666666666666,0.5
|
34 |
+
11,11,48,48,48,48,1.0,0.5,0.6666666666666666,0.5
|
35 |
+
12,12,36,36,36,36,1.0,0.5,0.6666666666666666,0.5
|
36 |
+
13,13,17,17,17,17,1.0,0.5,0.6666666666666666,0.5
|
37 |
+
14,14,77,77,77,77,1.0,0.5,0.6666666666666666,0.5
|
38 |
+
15,15,40,40,39,37,0.975,0.5131578947368421,0.6724137931034483,0.525
|
39 |
+
16,16,29,29,29,29,1.0,0.5,0.6666666666666666,0.5
|
40 |
+
17,total,1043,1043,1042,1037,,,,
|
41 |
+
18,,,,,Micro avg.,0.9990412272291467,0.5012025012025012,0.6675208199871876,0.5023969319271333
|
42 |
+
19,,,,,Macro avg.,0.998529411764706,0.5012509413438205,0.6674264069736162,0.5024092615769712
|
43 |
+
threshold-0.25,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
|
44 |
+
0,0,2,2,1,2,0.5,0.3333333333333333,0.4,0.25
|
45 |
+
1,1,30,30,29,30,0.9666666666666667,0.4915254237288136,0.651685393258427,0.48333333333333334
|
46 |
+
2,2,288,288,288,288,1.0,0.5,0.6666666666666666,0.5
|
47 |
+
3,3,87,87,80,80,0.9195402298850575,0.5,0.6477732793522267,0.5
|
48 |
+
4,4,94,94,80,73,0.851063829787234,0.5228758169934641,0.6477732793522267,0.5372340425531915
|
49 |
+
5,5,62,62,58,60,0.9354838709677419,0.4915254237288136,0.6444444444444444,0.4838709677419355
|
50 |
+
6,6,63,63,61,55,0.9682539682539683,0.5258620689655172,0.6815642458100558,0.5476190476190477
|
51 |
+
7,7,17,17,17,17,1.0,0.5,0.6666666666666666,0.5
|
52 |
+
8,8,65,65,65,64,1.0,0.5038759689922481,0.6701030927835051,0.5076923076923077
|
53 |
+
9,9,31,31,30,30,0.967741935483871,0.5,0.6593406593406593,0.5
|
54 |
+
10,10,57,57,57,57,1.0,0.5,0.6666666666666666,0.5
|
55 |
+
11,11,48,48,48,48,1.0,0.5,0.6666666666666666,0.5
|
56 |
+
12,12,36,36,36,36,1.0,0.5,0.6666666666666666,0.5
|
57 |
+
13,13,17,17,17,17,1.0,0.5,0.6666666666666666,0.5
|
58 |
+
14,14,77,77,72,64,0.935064935064935,0.5294117647058824,0.676056338028169,0.551948051948052
|
59 |
+
15,15,40,40,35,34,0.875,0.5072463768115942,0.6422018348623854,0.5125
|
60 |
+
16,16,29,29,29,29,1.0,0.5,0.6666666666666666,0.5
|
61 |
+
17,total,1043,1043,1003,984,,,,
|
62 |
+
18,,,,,Micro avg.,0.9616490891658677,0.5047810770005032,0.662046204620462,0.5091083413231065
|
63 |
+
19,,,,,Macro avg.,0.9364009080064397,0.4944503633682157,0.6463299549352215,0.49259986769928626
|
64 |
+
threshold-0.5,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
|
65 |
+
0,0,2,2,0,1,0.0,0.0,0.0,0.25
|
66 |
+
1,1,30,30,25,28,0.8333333333333334,0.4716981132075472,0.6024096385542169,0.45
|
67 |
+
2,2,288,288,288,288,1.0,0.5,0.6666666666666666,0.5
|
68 |
+
3,3,87,87,44,41,0.5057471264367817,0.5176470588235295,0.5116279069767442,0.5172413793103449
|
69 |
+
4,4,94,94,52,48,0.5531914893617021,0.52,0.536082474226804,0.5212765957446809
|
70 |
+
5,5,62,62,34,44,0.5483870967741935,0.4358974358974359,0.4857142857142857,0.41935483870967744
|
71 |
+
6,6,63,63,18,12,0.2857142857142857,0.6,0.3870967741935483,0.5476190476190477
|
72 |
+
7,7,17,17,16,15,0.9411764705882353,0.5161290322580645,0.6666666666666666,0.5294117647058824
|
73 |
+
8,8,65,65,63,59,0.9692307692307692,0.5163934426229508,0.6737967914438504,0.5307692307692308
|
74 |
+
9,9,31,31,24,27,0.7741935483870968,0.47058823529411764,0.5853658536585367,0.45161290322580644
|
75 |
+
10,10,57,57,47,52,0.8245614035087719,0.47474747474747475,0.6025641025641025,0.45614035087719296
|
76 |
+
11,11,48,48,47,48,0.9791666666666666,0.49473684210526314,0.6573426573426573,0.4895833333333333
|
77 |
+
12,12,36,36,33,32,0.9166666666666666,0.5076923076923077,0.6534653465346535,0.5138888888888888
|
78 |
+
13,13,17,17,16,17,0.9411764705882353,0.48484848484848486,0.64,0.47058823529411764
|
79 |
+
14,14,77,77,53,38,0.6883116883116883,0.5824175824175825,0.630952380952381,0.5974025974025974
|
80 |
+
15,15,40,40,25,25,0.625,0.5,0.5555555555555556,0.5
|
81 |
+
16,16,29,29,29,29,1.0,0.5,0.6666666666666666,0.5
|
82 |
+
17,total,1043,1043,814,804,,,,
|
83 |
+
18,,,,,Micro avg.,0.7804410354745925,0.5030902348578492,0.6118000751597145,0.5047938638542665
|
84 |
+
19,,,,,Macro avg.,0.7285798244452016,0.4760468241126329,0.5601161039833726,0.4849934803459294
|
85 |
+
threshold-0.75,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
|
86 |
+
0,0,2,2,0,0,0.0,0.0,0.0,0.5
|
87 |
+
1,1,30,30,11,11,0.36666666666666664,0.5,0.423076923076923,0.5
|
88 |
+
2,2,288,288,288,288,1.0,0.5,0.6666666666666666,0.5
|
89 |
+
3,3,87,87,6,9,0.06896551724137931,0.4,0.1176470588235294,0.4827586206896552
|
90 |
+
4,4,94,94,17,17,0.18085106382978725,0.5,0.265625,0.5
|
91 |
+
5,5,62,62,13,11,0.20967741935483872,0.5416666666666666,0.3023255813953488,0.5161290322580645
|
92 |
+
6,6,63,63,3,0,0.047619047619047616,1.0,0.0909090909090909,0.5238095238095238
|
93 |
+
7,7,17,17,14,13,0.8235294117647058,0.5185185185185185,0.6363636363636364,0.5294117647058824
|
94 |
+
8,8,65,65,49,46,0.7538461538461538,0.5157894736842106,0.6125,0.5230769230769231
|
95 |
+
9,9,31,31,17,17,0.5483870967741935,0.5,0.5230769230769231,0.5
|
96 |
+
10,10,57,57,29,26,0.5087719298245614,0.5272727272727272,0.5178571428571429,0.5263157894736842
|
97 |
+
11,11,48,48,43,46,0.8958333333333334,0.48314606741573035,0.6277372262773723,0.46875
|
98 |
+
12,12,36,36,22,19,0.6111111111111112,0.5365853658536586,0.5714285714285715,0.5416666666666666
|
99 |
+
13,13,17,17,13,15,0.7647058823529411,0.4642857142857143,0.5777777777777777,0.4411764705882353
|
100 |
+
14,14,77,77,10,11,0.12987012987012986,0.47619047619047616,0.20408163265306123,0.4935064935064935
|
101 |
+
15,15,40,40,10,13,0.25,0.43478260869565216,0.3174603174603175,0.4625
|
102 |
+
16,16,29,29,28,29,0.9655172413793104,0.49122807017543857,0.6511627906976745,0.4827586206896552
|
103 |
+
17,total,1043,1043,573,571,,,,
|
104 |
+
18,,,,,Micro avg.,0.5493767976989453,0.5008741258741258,0.52400548696845,0.5009587727708533
|
105 |
+
19,,,,,Macro avg.,0.4779618826451858,0.49349798169169373,0.4179821376155315,0.49952117090969317
|
106 |
+
topk-9,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
|
107 |
+
0,0,2,2,0,0,0.0,0.0,0.0,0.5
|
108 |
+
1,1,30,30,14,19,0.4666666666666667,0.42424242424242425,0.4444444444444445,0.4166666666666667
|
109 |
+
2,2,288,288,288,288,1.0,0.5,0.6666666666666666,0.5
|
110 |
+
3,3,87,87,28,32,0.3218390804597701,0.4666666666666667,0.380952380952381,0.47701149425287354
|
111 |
+
4,4,94,94,28,31,0.2978723404255319,0.4745762711864407,0.3660130718954248,0.48404255319148937
|
112 |
+
5,5,62,62,13,9,0.20967741935483872,0.5909090909090909,0.30952380952380953,0.532258064516129
|
113 |
+
6,6,63,63,8,1,0.12698412698412698,0.8888888888888888,0.2222222222222222,0.5555555555555556
|
114 |
+
7,7,17,17,16,15,0.9411764705882353,0.5161290322580645,0.6666666666666666,0.5294117647058824
|
115 |
+
8,8,65,65,58,54,0.8923076923076924,0.5178571428571429,0.6553672316384181,0.5307692307692308
|
116 |
+
9,9,31,31,23,28,0.7419354838709677,0.45098039215686275,0.5609756097560976,0.41935483870967744
|
117 |
+
10,10,57,57,42,33,0.7368421052631579,0.56,0.6363636363636364,0.5789473684210527
|
118 |
+
11,11,48,48,47,48,0.9791666666666666,0.49473684210526314,0.6573426573426573,0.4895833333333333
|
119 |
+
12,12,36,36,27,23,0.75,0.54,0.627906976744186,0.5555555555555556
|
120 |
+
13,13,17,17,13,10,0.7647058823529411,0.5652173913043478,0.65,0.5882352941176471
|
121 |
+
14,14,77,77,8,10,0.1038961038961039,0.4444444444444444,0.16842105263157894,0.487012987012987
|
122 |
+
15,15,40,40,18,13,0.45,0.5806451612903226,0.5070422535211268,0.5625
|
123 |
+
16,16,29,29,29,29,1.0,0.5,0.6666666666666666,0.5
|
124 |
+
17,total,1043,1043,660,643,,,,
|
125 |
+
18,,,,,Micro avg.,0.6327900287631831,0.5065234075211051,0.5626598465473146,0.5081495685522531
|
126 |
+
19,,,,,Macro avg.,0.5754747081668646,0.5008996322535272,0.481563255707999,0.5121708651063576
|
127 |
+
topk-7,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
|
128 |
+
0,0,2,2,0,0,0.0,0.0,0.0,0.5
|
129 |
+
1,1,30,30,7,11,0.23333333333333334,0.3888888888888889,0.2916666666666667,0.43333333333333335
|
130 |
+
2,2,288,288,288,288,1.0,0.5,0.6666666666666666,0.5
|
131 |
+
3,3,87,87,13,13,0.14942528735632185,0.5,0.23008849557522124,0.5
|
132 |
+
4,4,94,94,11,14,0.11702127659574468,0.44,0.18487394957983191,0.48404255319148937
|
133 |
+
5,5,62,62,7,2,0.11290322580645161,0.7777777777777778,0.19718309859154928,0.5403225806451613
|
134 |
+
6,6,63,63,0,0,0.0,0.0,0.0,0.5
|
135 |
+
7,7,17,17,15,14,0.8823529411764706,0.5172413793103449,0.6521739130434783,0.5294117647058824
|
136 |
+
8,8,65,65,52,53,0.8,0.49523809523809526,0.611764705882353,0.49230769230769234
|
137 |
+
9,9,31,31,17,22,0.5483870967741935,0.4358974358974359,0.4857142857142857,0.41935483870967744
|
138 |
+
10,10,57,57,27,20,0.47368421052631576,0.574468085106383,0.5192307692307692,0.5614035087719298
|
139 |
+
11,11,48,48,43,48,0.8958333333333334,0.4725274725274725,0.618705035971223,0.4479166666666667
|
140 |
+
12,12,36,36,17,10,0.4722222222222222,0.6296296296296297,0.5396825396825397,0.5972222222222222
|
141 |
+
13,13,17,17,12,8,0.7058823529411765,0.6,0.6486486486486486,0.6176470588235294
|
142 |
+
14,14,77,77,1,5,0.012987012987012988,0.16666666666666666,0.024096385542168676,0.474025974025974
|
143 |
+
15,15,40,40,13,8,0.325,0.6190476190476191,0.42622950819672134,0.5625
|
144 |
+
16,16,29,29,29,29,1.0,0.5,0.6666666666666666,0.5
|
145 |
+
17,total,1043,1043,552,545,,,,
|
146 |
+
18,,,,,Micro avg.,0.5292425695110259,0.5031905195989061,0.5158878504672897,0.5033557046979866
|
147 |
+
19,,,,,Macro avg.,0.4546489584148574,0.44808135588766546,0.3978465491563994,0.5093816584355034
|
148 |
+
topk-5,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
|
149 |
+
0,0,2,2,0,0,0.0,0.0,0.0,0.5
|
150 |
+
1,1,30,30,4,3,0.13333333333333333,0.5714285714285714,0.21621621621621623,0.5166666666666667
|
151 |
+
2,2,288,288,288,288,1.0,0.5,0.6666666666666666,0.5
|
152 |
+
3,3,87,87,4,3,0.04597701149425287,0.5714285714285714,0.08510638297872342,0.5057471264367817
|
153 |
+
4,4,94,94,6,7,0.06382978723404255,0.46153846153846156,0.11214953271028037,0.4946808510638298
|
154 |
+
5,5,62,62,3,0,0.04838709677419355,1.0,0.09230769230769231,0.5241935483870968
|
155 |
+
6,6,63,63,0,0,0.0,0.0,0.0,0.5
|
156 |
+
7,7,17,17,14,11,0.8235294117647058,0.56,0.6666666666666666,0.5882352941176471
|
157 |
+
8,8,65,65,40,36,0.6153846153846154,0.5263157894736842,0.5673758865248227,0.5307692307692308
|
158 |
+
9,9,31,31,12,14,0.3870967741935484,0.46153846153846156,0.42105263157894735,0.46774193548387094
|
159 |
+
10,10,57,57,14,9,0.24561403508771928,0.6086956521739131,0.35,0.543859649122807
|
160 |
+
11,11,48,48,35,36,0.7291666666666666,0.49295774647887325,0.5882352941176471,0.4895833333333333
|
161 |
+
12,12,36,36,8,5,0.2222222222222222,0.6153846153846154,0.326530612244898,0.5416666666666666
|
162 |
+
13,13,17,17,8,3,0.47058823529411764,0.7272727272727273,0.5714285714285714,0.6470588235294118
|
163 |
+
14,14,77,77,1,2,0.012987012987012988,0.3333333333333333,0.025,0.4935064935064935
|
164 |
+
15,15,40,40,6,6,0.15,0.5,0.23076923076923075,0.5
|
165 |
+
16,16,29,29,29,29,1.0,0.5,0.6666666666666666,0.5
|
166 |
+
17,total,1043,1043,472,452,,,,
|
167 |
+
18,,,,,Micro avg.,0.45254074784276127,0.5108225108225108,0.47991865785460097,0.5095877277085331
|
168 |
+
19,,,,,Macro avg.,0.34988918837861366,0.4958761135324243,0.32859835593394293,0.5202182128872844
|
169 |
+
topk-3,class,N# of True samples,N# of False samples,True Positives,False Positives,r,p,f1,acc
|
170 |
+
0,0,2,2,0,0,0.0,0.0,0.0,0.5
|
171 |
+
1,1,30,30,1,1,0.03333333333333333,0.5,0.0625,0.5
|
172 |
+
2,2,288,288,288,288,1.0,0.5,0.6666666666666666,0.5
|
173 |
+
3,3,87,87,1,0,0.011494252873563218,1.0,0.022727272727272724,0.5057471264367817
|
174 |
+
4,4,94,94,2,4,0.02127659574468085,0.3333333333333333,0.04,0.48936170212765956
|
175 |
+
5,5,62,62,1,0,0.016129032258064516,1.0,0.031746031746031744,0.5080645161290323
|
176 |
+
6,6,63,63,0,0,0.0,0.0,0.0,0.5
|
177 |
+
7,7,17,17,10,5,0.5882352941176471,0.6666666666666666,0.625,0.6470588235294118
|
178 |
+
8,8,65,65,12,9,0.18461538461538463,0.5714285714285714,0.27906976744186046,0.5230769230769231
|
179 |
+
9,9,31,31,9,6,0.2903225806451613,0.6,0.3913043478260869,0.5483870967741935
|
180 |
+
10,10,57,57,5,3,0.08771929824561403,0.625,0.15384615384615383,0.5175438596491229
|
181 |
+
11,11,48,48,20,18,0.4166666666666667,0.5263157894736842,0.46511627906976744,0.5208333333333334
|
182 |
+
12,12,36,36,5,3,0.1388888888888889,0.625,0.2272727272727273,0.5277777777777778
|
183 |
+
13,13,17,17,3,1,0.17647058823529413,0.75,0.2857142857142857,0.5588235294117647
|
184 |
+
14,14,77,77,0,0,0.0,0.0,0.0,0.5
|
185 |
+
15,15,40,40,3,2,0.075,0.6,0.13333333333333333,0.5125
|
186 |
+
16,16,29,29,26,28,0.896551724137931,0.48148148148148145,0.6265060240963856,0.46551724137931033
|
187 |
+
17,total,1043,1043,386,368,,,,
|
188 |
+
18,,,,,Micro avg.,0.3700862895493768,0.5119363395225465,0.4296048970506399,0.5086289549376798
|
189 |
+
19,,,,,Macro avg.,0.23157080233895466,0.5164250495519845,0.2359295817494454,0.519099525272077
|