updated metadata (eval scores)

#1
by rootacess - opened
Files changed (1) hide show
  1. README.md +21 -21
README.md CHANGED
@@ -29,7 +29,7 @@ model-index:
29
  metrics:
30
  - name: pass@1
31
  type: pass@1
32
- value: 46.2
33
  verified: false
34
  - task:
35
  type: text-generation
@@ -39,7 +39,7 @@ model-index:
39
  metrics:
40
  - name: pass@1
41
  type: pass@1
42
- value: 39.2
43
  verified: false
44
  - task:
45
  type: text-generation
@@ -49,7 +49,7 @@ model-index:
49
  metrics:
50
  - name: pass@1
51
  type: pass@1
52
- value: 38.2
53
  verified: false
54
  - task:
55
  type: text-generation
@@ -59,7 +59,7 @@ model-index:
59
  metrics:
60
  - name: pass@1
61
  type: pass@1
62
- value: 30.4
63
  verified: false
64
  - task:
65
  type: text-generation
@@ -69,7 +69,7 @@ model-index:
69
  metrics:
70
  - name: pass@1
71
  type: pass@1
72
- value: 35.6
73
  verified: false
74
  - task:
75
  type: text-generation
@@ -79,7 +79,7 @@ model-index:
79
  metrics:
80
  - name: pass@1
81
  type: pass@1
82
- value: 23.4
83
  verified: false
84
  - task:
85
  type: text-generation
@@ -89,7 +89,7 @@ model-index:
89
  metrics:
90
  - name: pass@1
91
  type: pass@1
92
- value: 35.5
93
  verified: false
94
  - task:
95
  type: text-generation
@@ -99,7 +99,7 @@ model-index:
99
  metrics:
100
  - name: pass@1
101
  type: pass@1
102
- value: 30.2
103
  verified: false
104
  - task:
105
  type: text-generation
@@ -109,7 +109,7 @@ model-index:
109
  metrics:
110
  - name: pass@1
111
  type: pass@1
112
- value: 28.4
113
  verified: false
114
  - task:
115
  type: text-generation
@@ -119,7 +119,7 @@ model-index:
119
  metrics:
120
  - name: pass@1
121
  type: pass@1
122
- value: 30.6
123
  verified: false
124
  - task:
125
  type: text-generation
@@ -129,7 +129,7 @@ model-index:
129
  metrics:
130
  - name: pass@1
131
  type: pass@1
132
- value: 30.2
133
  verified: false
134
  - task:
135
  type: text-generation
@@ -139,7 +139,7 @@ model-index:
139
  metrics:
140
  - name: pass@1
141
  type: pass@1
142
- value: 26.1
143
  verified: false
144
  - task:
145
  type: text-generation
@@ -149,7 +149,7 @@ model-index:
149
  metrics:
150
  - name: pass@1
151
  type: pass@1
152
- value: 16.5
153
  verified: false
154
  - task:
155
  type: text-generation
@@ -159,7 +159,7 @@ model-index:
159
  metrics:
160
  - name: pass@1
161
  type: pass@1
162
- value: 27.0
163
  verified: false
164
  - task:
165
  type: text-generation
@@ -169,7 +169,7 @@ model-index:
169
  metrics:
170
  - name: pass@1
171
  type: pass@1
172
- value: 35.1
173
  verified: false
174
  - task:
175
  type: text-generation
@@ -179,7 +179,7 @@ model-index:
179
  metrics:
180
  - name: pass@1
181
  type: pass@1
182
- value: 24.5
183
  verified: false
184
  - task:
185
  type: text-generation
@@ -189,7 +189,7 @@ model-index:
189
  metrics:
190
  - name: pass@1
191
  type: pass@1
192
- value: 27.3
193
  verified: false
194
  - task:
195
  type: text-generation
@@ -199,7 +199,7 @@ model-index:
199
  metrics:
200
  - name: pass@1
201
  type: pass@1
202
- value: 21.1
203
  verified: false
204
  - task:
205
  type: text-generation
@@ -209,7 +209,7 @@ model-index:
209
  metrics:
210
  - name: pass@1
211
  type: pass@1
212
- value: 24.1
213
  verified: false
214
  - task:
215
  type: text-generation
@@ -219,7 +219,7 @@ model-index:
219
  metrics:
220
  - name: pass@1
221
  type: pass@1
222
- value: 14.8
223
  verified: false
224
  - task:
225
  type: text-generation
@@ -229,7 +229,7 @@ model-index:
229
  metrics:
230
  - name: pass@1
231
  type: pass@1
232
- value: 24.5
233
  verified: false
234
  ---
235
 
 
29
  metrics:
30
  - name: pass@1
31
  type: pass@1
32
+ value: 44.7
33
  verified: false
34
  - task:
35
  type: text-generation
 
39
  metrics:
40
  - name: pass@1
41
  type: pass@1
42
+ value: 33.8
43
  verified: false
44
  - task:
45
  type: text-generation
 
49
  metrics:
50
  - name: pass@1
51
  type: pass@1
52
+ value: 36.9
53
  verified: false
54
  - task:
55
  type: text-generation
 
59
  metrics:
60
  - name: pass@1
61
  type: pass@1
62
+ value: 21.9
63
  verified: false
64
  - task:
65
  type: text-generation
 
69
  metrics:
70
  - name: pass@1
71
  type: pass@1
72
+ value: 32.3
73
  verified: false
74
  - task:
75
  type: text-generation
 
79
  metrics:
80
  - name: pass@1
81
  type: pass@1
82
+ value: 25.7
83
  verified: false
84
  - task:
85
  type: text-generation
 
89
  metrics:
90
  - name: pass@1
91
  type: pass@1
92
+ value: 30.9
93
  verified: false
94
  - task:
95
  type: text-generation
 
99
  metrics:
100
  - name: pass@1
101
  type: pass@1
102
+ value: 28.1
103
  verified: false
104
  - task:
105
  type: text-generation
 
109
  metrics:
110
  - name: pass@1
111
  type: pass@1
112
+ value: 27.7
113
  verified: false
114
  - task:
115
  type: text-generation
 
119
  metrics:
120
  - name: pass@1
121
  type: pass@1
122
+ value: 30.4
123
  verified: false
124
  - task:
125
  type: text-generation
 
129
  metrics:
130
  - name: pass@1
131
  type: pass@1
132
+ value: 27.6
133
  verified: false
134
  - task:
135
  type: text-generation
 
139
  metrics:
140
  - name: pass@1
141
  type: pass@1
142
+ value: 22.9
143
  verified: false
144
  - task:
145
  type: text-generation
 
149
  metrics:
150
  - name: pass@1
151
  type: pass@1
152
+ value: 9.6
153
  verified: false
154
  - task:
155
  type: text-generation
 
159
  metrics:
160
  - name: pass@1
161
  type: pass@1
162
+ value: 24.4
163
  verified: false
164
  - task:
165
  type: text-generation
 
169
  metrics:
170
  - name: pass@1
171
  type: pass@1
172
+ value: 30.4
173
  verified: false
174
  - task:
175
  type: text-generation
 
179
  metrics:
180
  - name: pass@1
181
  type: pass@1
182
+ value: 24.0
183
  verified: false
184
  - task:
185
  type: text-generation
 
189
  metrics:
190
  - name: pass@1
191
  type: pass@1
192
+ value: 24.7
193
  verified: false
194
  - task:
195
  type: text-generation
 
199
  metrics:
200
  - name: pass@1
201
  type: pass@1
202
+ value: 21.7
203
  verified: false
204
  - task:
205
  type: text-generation
 
209
  metrics:
210
  - name: pass@1
211
  type: pass@1
212
+ value: 21.0
213
  verified: false
214
  - task:
215
  type: text-generation
 
219
  metrics:
220
  - name: pass@1
221
  type: pass@1
222
+ value: 15.9
223
  verified: false
224
  - task:
225
  type: text-generation
 
229
  metrics:
230
  - name: pass@1
231
  type: pass@1
232
+ value: 22.9
233
  verified: false
234
  ---
235