Isaak Carter Augustus commited on
Commit
9fafa58
1 Parent(s): 6d97107

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +387 -0
README.md CHANGED
@@ -17,6 +17,393 @@ JOSIE_Beta-8-7B-slerp is a merge of the following models using [LazyMergekit](ht
17
  * [HuggingFaceH4/mistral-7b-anthropic](https://huggingface.co/HuggingFaceH4/mistral-7b-anthropic)
18
  * [HuggingFaceH4/mistral-7b-grok](https://huggingface.co/HuggingFaceH4/mistral-7b-grok)
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  ## 🧩 Configuration
21
 
22
  ```yaml
 
17
  * [HuggingFaceH4/mistral-7b-anthropic](https://huggingface.co/HuggingFaceH4/mistral-7b-anthropic)
18
  * [HuggingFaceH4/mistral-7b-grok](https://huggingface.co/HuggingFaceH4/mistral-7b-grok)
19
 
20
+ # Important!!!
21
+
22
+ Upon seing the eval benchmarks on the LLM Leaderbard this model performs the worst. the best performing one (in the leaderboard) is the 3 beta version.
23
+
24
+ ```json
25
+ {
26
+ "all": {
27
+ "acc": 0.6212846416057433,
28
+ "acc_stderr": 0.03289607423593368,
29
+ "acc_norm": 0.6268274539918854,
30
+ "acc_norm_stderr": 0.03356884635772938,
31
+ "mc1": 0.3157894736842105,
32
+ "mc1_stderr": 0.016272287957916923,
33
+ "mc2": 0.4868797251828956,
34
+ "mc2_stderr": 0.01529943410920313
35
+ },
36
+ "harness|arc:challenge|25": {
37
+ "acc": 0.5776450511945392,
38
+ "acc_stderr": 0.014434138713379981,
39
+ "acc_norm": 0.6040955631399317,
40
+ "acc_norm_stderr": 0.014291228393536592
41
+ },
42
+ "harness|hellaswag|10": {
43
+ "acc": 0.6363274248157738,
44
+ "acc_stderr": 0.004800728138792394,
45
+ "acc_norm": 0.8365863373829915,
46
+ "acc_norm_stderr": 0.0036898701424130753
47
+ },
48
+ "harness|hendrycksTest-abstract_algebra|5": {
49
+ "acc": 0.31,
50
+ "acc_stderr": 0.046482319871173156,
51
+ "acc_norm": 0.31,
52
+ "acc_norm_stderr": 0.046482319871173156
53
+ },
54
+ "harness|hendrycksTest-anatomy|5": {
55
+ "acc": 0.6,
56
+ "acc_stderr": 0.04232073695151589,
57
+ "acc_norm": 0.6,
58
+ "acc_norm_stderr": 0.04232073695151589
59
+ },
60
+ "harness|hendrycksTest-astronomy|5": {
61
+ "acc": 0.5986842105263158,
62
+ "acc_stderr": 0.039889037033362836,
63
+ "acc_norm": 0.5986842105263158,
64
+ "acc_norm_stderr": 0.039889037033362836
65
+ },
66
+ "harness|hendrycksTest-business_ethics|5": {
67
+ "acc": 0.58,
68
+ "acc_stderr": 0.049604496374885836,
69
+ "acc_norm": 0.58,
70
+ "acc_norm_stderr": 0.049604496374885836
71
+ },
72
+ "harness|hendrycksTest-clinical_knowledge|5": {
73
+ "acc": 0.6867924528301886,
74
+ "acc_stderr": 0.028544793319055326,
75
+ "acc_norm": 0.6867924528301886,
76
+ "acc_norm_stderr": 0.028544793319055326
77
+ },
78
+ "harness|hendrycksTest-college_biology|5": {
79
+ "acc": 0.7083333333333334,
80
+ "acc_stderr": 0.038009680605548594,
81
+ "acc_norm": 0.7083333333333334,
82
+ "acc_norm_stderr": 0.038009680605548594
83
+ },
84
+ "harness|hendrycksTest-college_chemistry|5": {
85
+ "acc": 0.53,
86
+ "acc_stderr": 0.05016135580465919,
87
+ "acc_norm": 0.53,
88
+ "acc_norm_stderr": 0.05016135580465919
89
+ },
90
+ "harness|hendrycksTest-college_computer_science|5": {
91
+ "acc": 0.51,
92
+ "acc_stderr": 0.05024183937956912,
93
+ "acc_norm": 0.51,
94
+ "acc_norm_stderr": 0.05024183937956912
95
+ },
96
+ "harness|hendrycksTest-college_mathematics|5": {
97
+ "acc": 0.39,
98
+ "acc_stderr": 0.04902071300001975,
99
+ "acc_norm": 0.39,
100
+ "acc_norm_stderr": 0.04902071300001975
101
+ },
102
+ "harness|hendrycksTest-college_medicine|5": {
103
+ "acc": 0.6184971098265896,
104
+ "acc_stderr": 0.03703851193099521,
105
+ "acc_norm": 0.6184971098265896,
106
+ "acc_norm_stderr": 0.03703851193099521
107
+ },
108
+ "harness|hendrycksTest-college_physics|5": {
109
+ "acc": 0.39215686274509803,
110
+ "acc_stderr": 0.048580835742663454,
111
+ "acc_norm": 0.39215686274509803,
112
+ "acc_norm_stderr": 0.048580835742663454
113
+ },
114
+ "harness|hendrycksTest-computer_security|5": {
115
+ "acc": 0.75,
116
+ "acc_stderr": 0.04351941398892446,
117
+ "acc_norm": 0.75,
118
+ "acc_norm_stderr": 0.04351941398892446
119
+ },
120
+ "harness|hendrycksTest-conceptual_physics|5": {
121
+ "acc": 0.5659574468085107,
122
+ "acc_stderr": 0.03240038086792747,
123
+ "acc_norm": 0.5659574468085107,
124
+ "acc_norm_stderr": 0.03240038086792747
125
+ },
126
+ "harness|hendrycksTest-econometrics|5": {
127
+ "acc": 0.47368421052631576,
128
+ "acc_stderr": 0.04697085136647863,
129
+ "acc_norm": 0.47368421052631576,
130
+ "acc_norm_stderr": 0.04697085136647863
131
+ },
132
+ "harness|hendrycksTest-electrical_engineering|5": {
133
+ "acc": 0.5586206896551724,
134
+ "acc_stderr": 0.04137931034482757,
135
+ "acc_norm": 0.5586206896551724,
136
+ "acc_norm_stderr": 0.04137931034482757
137
+ },
138
+ "harness|hendrycksTest-elementary_mathematics|5": {
139
+ "acc": 0.41005291005291006,
140
+ "acc_stderr": 0.025331202438944437,
141
+ "acc_norm": 0.41005291005291006,
142
+ "acc_norm_stderr": 0.025331202438944437
143
+ },
144
+ "harness|hendrycksTest-formal_logic|5": {
145
+ "acc": 0.3888888888888889,
146
+ "acc_stderr": 0.04360314860077459,
147
+ "acc_norm": 0.3888888888888889,
148
+ "acc_norm_stderr": 0.04360314860077459
149
+ },
150
+ "harness|hendrycksTest-global_facts|5": {
151
+ "acc": 0.4,
152
+ "acc_stderr": 0.049236596391733084,
153
+ "acc_norm": 0.4,
154
+ "acc_norm_stderr": 0.049236596391733084
155
+ },
156
+ "harness|hendrycksTest-high_school_biology|5": {
157
+ "acc": 0.7580645161290323,
158
+ "acc_stderr": 0.024362599693031083,
159
+ "acc_norm": 0.7580645161290323,
160
+ "acc_norm_stderr": 0.024362599693031083
161
+ },
162
+ "harness|hendrycksTest-high_school_chemistry|5": {
163
+ "acc": 0.5221674876847291,
164
+ "acc_stderr": 0.03514528562175008,
165
+ "acc_norm": 0.5221674876847291,
166
+ "acc_norm_stderr": 0.03514528562175008
167
+ },
168
+ "harness|hendrycksTest-high_school_computer_science|5": {
169
+ "acc": 0.63,
170
+ "acc_stderr": 0.04852365870939099,
171
+ "acc_norm": 0.63,
172
+ "acc_norm_stderr": 0.04852365870939099
173
+ },
174
+ "harness|hendrycksTest-high_school_european_history|5": {
175
+ "acc": 0.7515151515151515,
176
+ "acc_stderr": 0.033744026441394036,
177
+ "acc_norm": 0.7515151515151515,
178
+ "acc_norm_stderr": 0.033744026441394036
179
+ },
180
+ "harness|hendrycksTest-high_school_geography|5": {
181
+ "acc": 0.7727272727272727,
182
+ "acc_stderr": 0.029857515673386417,
183
+ "acc_norm": 0.7727272727272727,
184
+ "acc_norm_stderr": 0.029857515673386417
185
+ },
186
+ "harness|hendrycksTest-high_school_government_and_politics|5": {
187
+ "acc": 0.8497409326424871,
188
+ "acc_stderr": 0.025787723180723875,
189
+ "acc_norm": 0.8497409326424871,
190
+ "acc_norm_stderr": 0.025787723180723875
191
+ },
192
+ "harness|hendrycksTest-high_school_macroeconomics|5": {
193
+ "acc": 0.6384615384615384,
194
+ "acc_stderr": 0.024359581465396997,
195
+ "acc_norm": 0.6384615384615384,
196
+ "acc_norm_stderr": 0.024359581465396997
197
+ },
198
+ "harness|hendrycksTest-high_school_mathematics|5": {
199
+ "acc": 0.337037037037037,
200
+ "acc_stderr": 0.028820884666253255,
201
+ "acc_norm": 0.337037037037037,
202
+ "acc_norm_stderr": 0.028820884666253255
203
+ },
204
+ "harness|hendrycksTest-high_school_microeconomics|5": {
205
+ "acc": 0.6764705882352942,
206
+ "acc_stderr": 0.03038835355188679,
207
+ "acc_norm": 0.6764705882352942,
208
+ "acc_norm_stderr": 0.03038835355188679
209
+ },
210
+ "harness|hendrycksTest-high_school_physics|5": {
211
+ "acc": 0.3443708609271523,
212
+ "acc_stderr": 0.038796870240733264,
213
+ "acc_norm": 0.3443708609271523,
214
+ "acc_norm_stderr": 0.038796870240733264
215
+ },
216
+ "harness|hendrycksTest-high_school_psychology|5": {
217
+ "acc": 0.8055045871559633,
218
+ "acc_stderr": 0.01697028909045804,
219
+ "acc_norm": 0.8055045871559633,
220
+ "acc_norm_stderr": 0.01697028909045804
221
+ },
222
+ "harness|hendrycksTest-high_school_statistics|5": {
223
+ "acc": 0.5370370370370371,
224
+ "acc_stderr": 0.03400603625538272,
225
+ "acc_norm": 0.5370370370370371,
226
+ "acc_norm_stderr": 0.03400603625538272
227
+ },
228
+ "harness|hendrycksTest-high_school_us_history|5": {
229
+ "acc": 0.7794117647058824,
230
+ "acc_stderr": 0.02910225438967407,
231
+ "acc_norm": 0.7794117647058824,
232
+ "acc_norm_stderr": 0.02910225438967407
233
+ },
234
+ "harness|hendrycksTest-high_school_world_history|5": {
235
+ "acc": 0.759493670886076,
236
+ "acc_stderr": 0.027820781981149685,
237
+ "acc_norm": 0.759493670886076,
238
+ "acc_norm_stderr": 0.027820781981149685
239
+ },
240
+ "harness|hendrycksTest-human_aging|5": {
241
+ "acc": 0.6636771300448431,
242
+ "acc_stderr": 0.031708824268455,
243
+ "acc_norm": 0.6636771300448431,
244
+ "acc_norm_stderr": 0.031708824268455
245
+ },
246
+ "harness|hendrycksTest-human_sexuality|5": {
247
+ "acc": 0.7251908396946565,
248
+ "acc_stderr": 0.03915345408847836,
249
+ "acc_norm": 0.7251908396946565,
250
+ "acc_norm_stderr": 0.03915345408847836
251
+ },
252
+ "harness|hendrycksTest-international_law|5": {
253
+ "acc": 0.71900826446281,
254
+ "acc_stderr": 0.04103203830514512,
255
+ "acc_norm": 0.71900826446281,
256
+ "acc_norm_stderr": 0.04103203830514512
257
+ },
258
+ "harness|hendrycksTest-jurisprudence|5": {
259
+ "acc": 0.7685185185185185,
260
+ "acc_stderr": 0.04077494709252626,
261
+ "acc_norm": 0.7685185185185185,
262
+ "acc_norm_stderr": 0.04077494709252626
263
+ },
264
+ "harness|hendrycksTest-logical_fallacies|5": {
265
+ "acc": 0.7177914110429447,
266
+ "acc_stderr": 0.03536117886664743,
267
+ "acc_norm": 0.7177914110429447,
268
+ "acc_norm_stderr": 0.03536117886664743
269
+ },
270
+ "harness|hendrycksTest-machine_learning|5": {
271
+ "acc": 0.41964285714285715,
272
+ "acc_stderr": 0.04684099321077106,
273
+ "acc_norm": 0.41964285714285715,
274
+ "acc_norm_stderr": 0.04684099321077106
275
+ },
276
+ "harness|hendrycksTest-management|5": {
277
+ "acc": 0.7766990291262136,
278
+ "acc_stderr": 0.04123553189891431,
279
+ "acc_norm": 0.7766990291262136,
280
+ "acc_norm_stderr": 0.04123553189891431
281
+ },
282
+ "harness|hendrycksTest-marketing|5": {
283
+ "acc": 0.8974358974358975,
284
+ "acc_stderr": 0.019875655027867447,
285
+ "acc_norm": 0.8974358974358975,
286
+ "acc_norm_stderr": 0.019875655027867447
287
+ },
288
+ "harness|hendrycksTest-medical_genetics|5": {
289
+ "acc": 0.71,
290
+ "acc_stderr": 0.045604802157206845,
291
+ "acc_norm": 0.71,
292
+ "acc_norm_stderr": 0.045604802157206845
293
+ },
294
+ "harness|hendrycksTest-miscellaneous|5": {
295
+ "acc": 0.7918263090676884,
296
+ "acc_stderr": 0.014518592248904033,
297
+ "acc_norm": 0.7918263090676884,
298
+ "acc_norm_stderr": 0.014518592248904033
299
+ },
300
+ "harness|hendrycksTest-moral_disputes|5": {
301
+ "acc": 0.7052023121387283,
302
+ "acc_stderr": 0.024547617794803828,
303
+ "acc_norm": 0.7052023121387283,
304
+ "acc_norm_stderr": 0.024547617794803828
305
+ },
306
+ "harness|hendrycksTest-moral_scenarios|5": {
307
+ "acc": 0.4044692737430168,
308
+ "acc_stderr": 0.016414440917293147,
309
+ "acc_norm": 0.4044692737430168,
310
+ "acc_norm_stderr": 0.016414440917293147
311
+ },
312
+ "harness|hendrycksTest-nutrition|5": {
313
+ "acc": 0.7091503267973857,
314
+ "acc_stderr": 0.02600480036395213,
315
+ "acc_norm": 0.7091503267973857,
316
+ "acc_norm_stderr": 0.02600480036395213
317
+ },
318
+ "harness|hendrycksTest-philosophy|5": {
319
+ "acc": 0.707395498392283,
320
+ "acc_stderr": 0.02583989833487798,
321
+ "acc_norm": 0.707395498392283,
322
+ "acc_norm_stderr": 0.02583989833487798
323
+ },
324
+ "harness|hendrycksTest-prehistory|5": {
325
+ "acc": 0.6944444444444444,
326
+ "acc_stderr": 0.025630824975621355,
327
+ "acc_norm": 0.6944444444444444,
328
+ "acc_norm_stderr": 0.025630824975621355
329
+ },
330
+ "harness|hendrycksTest-professional_accounting|5": {
331
+ "acc": 0.4716312056737589,
332
+ "acc_stderr": 0.029779450957303055,
333
+ "acc_norm": 0.4716312056737589,
334
+ "acc_norm_stderr": 0.029779450957303055
335
+ },
336
+ "harness|hendrycksTest-professional_law|5": {
337
+ "acc": 0.4302477183833116,
338
+ "acc_stderr": 0.012645361435115233,
339
+ "acc_norm": 0.4302477183833116,
340
+ "acc_norm_stderr": 0.012645361435115233
341
+ },
342
+ "harness|hendrycksTest-professional_medicine|5": {
343
+ "acc": 0.6397058823529411,
344
+ "acc_stderr": 0.02916312857067073,
345
+ "acc_norm": 0.6397058823529411,
346
+ "acc_norm_stderr": 0.02916312857067073
347
+ },
348
+ "harness|hendrycksTest-professional_psychology|5": {
349
+ "acc": 0.6470588235294118,
350
+ "acc_stderr": 0.01933314202079716,
351
+ "acc_norm": 0.6470588235294118,
352
+ "acc_norm_stderr": 0.01933314202079716
353
+ },
354
+ "harness|hendrycksTest-public_relations|5": {
355
+ "acc": 0.6363636363636364,
356
+ "acc_stderr": 0.04607582090719976,
357
+ "acc_norm": 0.6363636363636364,
358
+ "acc_norm_stderr": 0.04607582090719976
359
+ },
360
+ "harness|hendrycksTest-security_studies|5": {
361
+ "acc": 0.6775510204081633,
362
+ "acc_stderr": 0.029923100563683906,
363
+ "acc_norm": 0.6775510204081633,
364
+ "acc_norm_stderr": 0.029923100563683906
365
+ },
366
+ "harness|hendrycksTest-sociology|5": {
367
+ "acc": 0.8208955223880597,
368
+ "acc_stderr": 0.027113286753111844,
369
+ "acc_norm": 0.8208955223880597,
370
+ "acc_norm_stderr": 0.027113286753111844
371
+ },
372
+ "harness|hendrycksTest-us_foreign_policy|5": {
373
+ "acc": 0.85,
374
+ "acc_stderr": 0.03588702812826371,
375
+ "acc_norm": 0.85,
376
+ "acc_norm_stderr": 0.03588702812826371
377
+ },
378
+ "harness|hendrycksTest-virology|5": {
379
+ "acc": 0.5,
380
+ "acc_stderr": 0.03892494720807614,
381
+ "acc_norm": 0.5,
382
+ "acc_norm_stderr": 0.03892494720807614
383
+ },
384
+ "harness|hendrycksTest-world_religions|5": {
385
+ "acc": 0.8245614035087719,
386
+ "acc_stderr": 0.029170885500727665,
387
+ "acc_norm": 0.8245614035087719,
388
+ "acc_norm_stderr": 0.029170885500727665
389
+ },
390
+ "harness|truthfulqa:mc|0": {
391
+ "mc1": 0.3157894736842105,
392
+ "mc1_stderr": 0.016272287957916923,
393
+ "mc2": 0.4868797251828956,
394
+ "mc2_stderr": 0.01529943410920313
395
+ },
396
+ "harness|winogrande|5": {
397
+ "acc": 0.7813733228097869,
398
+ "acc_stderr": 0.011616198215773239
399
+ },
400
+ "harness|gsm8k|5": {
401
+ "acc": 0.36087945413191813,
402
+ "acc_stderr": 0.013228626753925143
403
+ }
404
+ }
405
+ ```
406
+
407
  ## 🧩 Configuration
408
 
409
  ```yaml