mychen76 commited on
Commit
77ed3fc
1 Parent(s): 3b74bfb

Update result

Browse files
Files changed (1) hide show
  1. README.md +34 -383
README.md CHANGED
@@ -43,387 +43,38 @@ experts:
43
  tokenizer_source: union
44
  ```
45
 
46
- Evaluation Result:
47
- https://huggingface.co/datasets/open-llm-leaderboard/details_mychen76__openmixtral-4x7b-merged
48
- ```
49
- {
50
- "all": {
51
- "acc": 0.657144834577193,
52
- "acc_stderr": 0.03198053543647407,
53
- "acc_norm": 0.6572006879598793,
54
- "acc_norm_stderr": 0.0326392415851668,
55
- "mc1": 0.44430844553243576,
56
- "mc1_stderr": 0.01739458625074317,
57
- "mc2": 0.6132594486430695,
58
- "mc2_stderr": 0.015532509494332434
59
- },
60
- "harness|arc:challenge|25": {
61
- "acc": 0.6604095563139932,
62
- "acc_stderr": 0.013839039762820166,
63
- "acc_norm": 0.6945392491467577,
64
- "acc_norm_stderr": 0.01346008047800251
65
- },
66
- "harness|hellaswag|10": {
67
- "acc": 0.6901015733917546,
68
- "acc_stderr": 0.004615063817741861,
69
- "acc_norm": 0.8674566819358693,
70
- "acc_norm_stderr": 0.0033838751726700217
71
- },
72
- "harness|hendrycksTest-abstract_algebra|5": {
73
- "acc": 0.38,
74
- "acc_stderr": 0.048783173121456316,
75
- "acc_norm": 0.38,
76
- "acc_norm_stderr": 0.048783173121456316
77
- },
78
- "harness|hendrycksTest-anatomy|5": {
79
- "acc": 0.6444444444444445,
80
- "acc_stderr": 0.04135176749720385,
81
- "acc_norm": 0.6444444444444445,
82
- "acc_norm_stderr": 0.04135176749720385
83
- },
84
- "harness|hendrycksTest-astronomy|5": {
85
- "acc": 0.6776315789473685,
86
- "acc_stderr": 0.03803510248351585,
87
- "acc_norm": 0.6776315789473685,
88
- "acc_norm_stderr": 0.03803510248351585
89
- },
90
- "harness|hendrycksTest-business_ethics|5": {
91
- "acc": 0.63,
92
- "acc_stderr": 0.04852365870939099,
93
- "acc_norm": 0.63,
94
- "acc_norm_stderr": 0.04852365870939099
95
- },
96
- "harness|hendrycksTest-clinical_knowledge|5": {
97
- "acc": 0.7358490566037735,
98
- "acc_stderr": 0.027134291628741713,
99
- "acc_norm": 0.7358490566037735,
100
- "acc_norm_stderr": 0.027134291628741713
101
- },
102
- "harness|hendrycksTest-college_biology|5": {
103
- "acc": 0.7777777777777778,
104
- "acc_stderr": 0.03476590104304134,
105
- "acc_norm": 0.7777777777777778,
106
- "acc_norm_stderr": 0.03476590104304134
107
- },
108
- "harness|hendrycksTest-college_chemistry|5": {
109
- "acc": 0.49,
110
- "acc_stderr": 0.05024183937956911,
111
- "acc_norm": 0.49,
112
- "acc_norm_stderr": 0.05024183937956911
113
- },
114
- "harness|hendrycksTest-college_computer_science|5": {
115
- "acc": 0.57,
116
- "acc_stderr": 0.04975698519562428,
117
- "acc_norm": 0.57,
118
- "acc_norm_stderr": 0.04975698519562428
119
- },
120
- "harness|hendrycksTest-college_mathematics|5": {
121
- "acc": 0.35,
122
- "acc_stderr": 0.047937248544110196,
123
- "acc_norm": 0.35,
124
- "acc_norm_stderr": 0.047937248544110196
125
- },
126
- "harness|hendrycksTest-college_medicine|5": {
127
- "acc": 0.6763005780346821,
128
- "acc_stderr": 0.0356760379963917,
129
- "acc_norm": 0.6763005780346821,
130
- "acc_norm_stderr": 0.0356760379963917
131
- },
132
- "harness|hendrycksTest-college_physics|5": {
133
- "acc": 0.4117647058823529,
134
- "acc_stderr": 0.048971049527263666,
135
- "acc_norm": 0.4117647058823529,
136
- "acc_norm_stderr": 0.048971049527263666
137
- },
138
- "harness|hendrycksTest-computer_security|5": {
139
- "acc": 0.76,
140
- "acc_stderr": 0.04292346959909283,
141
- "acc_norm": 0.76,
142
- "acc_norm_stderr": 0.04292346959909283
143
- },
144
- "harness|hendrycksTest-conceptual_physics|5": {
145
- "acc": 0.5531914893617021,
146
- "acc_stderr": 0.0325005368436584,
147
- "acc_norm": 0.5531914893617021,
148
- "acc_norm_stderr": 0.0325005368436584
149
- },
150
- "harness|hendrycksTest-econometrics|5": {
151
- "acc": 0.5,
152
- "acc_stderr": 0.047036043419179864,
153
- "acc_norm": 0.5,
154
- "acc_norm_stderr": 0.047036043419179864
155
- },
156
- "harness|hendrycksTest-electrical_engineering|5": {
157
- "acc": 0.5655172413793104,
158
- "acc_stderr": 0.04130740879555497,
159
- "acc_norm": 0.5655172413793104,
160
- "acc_norm_stderr": 0.04130740879555497
161
- },
162
- "harness|hendrycksTest-elementary_mathematics|5": {
163
- "acc": 0.42328042328042326,
164
- "acc_stderr": 0.025446365634406793,
165
- "acc_norm": 0.42328042328042326,
166
- "acc_norm_stderr": 0.025446365634406793
167
- },
168
- "harness|hendrycksTest-formal_logic|5": {
169
- "acc": 0.47619047619047616,
170
- "acc_stderr": 0.04467062628403273,
171
- "acc_norm": 0.47619047619047616,
172
- "acc_norm_stderr": 0.04467062628403273
173
- },
174
- "harness|hendrycksTest-global_facts|5": {
175
- "acc": 0.33,
176
- "acc_stderr": 0.047258156262526045,
177
- "acc_norm": 0.33,
178
- "acc_norm_stderr": 0.047258156262526045
179
- },
180
- "harness|hendrycksTest-high_school_biology|5": {
181
- "acc": 0.8064516129032258,
182
- "acc_stderr": 0.022475258525536057,
183
- "acc_norm": 0.8064516129032258,
184
- "acc_norm_stderr": 0.022475258525536057
185
- },
186
- "harness|hendrycksTest-high_school_chemistry|5": {
187
- "acc": 0.5123152709359606,
188
- "acc_stderr": 0.035169204442208966,
189
- "acc_norm": 0.5123152709359606,
190
- "acc_norm_stderr": 0.035169204442208966
191
- },
192
- "harness|hendrycksTest-high_school_computer_science|5": {
193
- "acc": 0.72,
194
- "acc_stderr": 0.04512608598542127,
195
- "acc_norm": 0.72,
196
- "acc_norm_stderr": 0.04512608598542127
197
- },
198
- "harness|hendrycksTest-high_school_european_history|5": {
199
- "acc": 0.7757575757575758,
200
- "acc_stderr": 0.03256866661681102,
201
- "acc_norm": 0.7757575757575758,
202
- "acc_norm_stderr": 0.03256866661681102
203
- },
204
- "harness|hendrycksTest-high_school_geography|5": {
205
- "acc": 0.8080808080808081,
206
- "acc_stderr": 0.028057791672989017,
207
- "acc_norm": 0.8080808080808081,
208
- "acc_norm_stderr": 0.028057791672989017
209
- },
210
- "harness|hendrycksTest-high_school_government_and_politics|5": {
211
- "acc": 0.8963730569948186,
212
- "acc_stderr": 0.02199531196364424,
213
- "acc_norm": 0.8963730569948186,
214
- "acc_norm_stderr": 0.02199531196364424
215
- },
216
- "harness|hendrycksTest-high_school_macroeconomics|5": {
217
- "acc": 0.6666666666666666,
218
- "acc_stderr": 0.023901157979402534,
219
- "acc_norm": 0.6666666666666666,
220
- "acc_norm_stderr": 0.023901157979402534
221
- },
222
- "harness|hendrycksTest-high_school_mathematics|5": {
223
- "acc": 0.34814814814814815,
224
- "acc_stderr": 0.029045600290616255,
225
- "acc_norm": 0.34814814814814815,
226
- "acc_norm_stderr": 0.029045600290616255
227
- },
228
- "harness|hendrycksTest-high_school_microeconomics|5": {
229
- "acc": 0.7142857142857143,
230
- "acc_stderr": 0.029344572500634335,
231
- "acc_norm": 0.7142857142857143,
232
- "acc_norm_stderr": 0.029344572500634335
233
- },
234
- "harness|hendrycksTest-high_school_physics|5": {
235
- "acc": 0.31788079470198677,
236
- "acc_stderr": 0.038020397601079024,
237
- "acc_norm": 0.31788079470198677,
238
- "acc_norm_stderr": 0.038020397601079024
239
- },
240
- "harness|hendrycksTest-high_school_psychology|5": {
241
- "acc": 0.8495412844036697,
242
- "acc_stderr": 0.015328563932669237,
243
- "acc_norm": 0.8495412844036697,
244
- "acc_norm_stderr": 0.015328563932669237
245
- },
246
- "harness|hendrycksTest-high_school_statistics|5": {
247
- "acc": 0.5231481481481481,
248
- "acc_stderr": 0.03406315360711507,
249
- "acc_norm": 0.5231481481481481,
250
- "acc_norm_stderr": 0.03406315360711507
251
- },
252
- "harness|hendrycksTest-high_school_us_history|5": {
253
- "acc": 0.8480392156862745,
254
- "acc_stderr": 0.025195658428931796,
255
- "acc_norm": 0.8480392156862745,
256
- "acc_norm_stderr": 0.025195658428931796
257
- },
258
- "harness|hendrycksTest-high_school_world_history|5": {
259
- "acc": 0.810126582278481,
260
- "acc_stderr": 0.025530100460233504,
261
- "acc_norm": 0.810126582278481,
262
- "acc_norm_stderr": 0.025530100460233504
263
- },
264
- "harness|hendrycksTest-human_aging|5": {
265
- "acc": 0.695067264573991,
266
- "acc_stderr": 0.030898610882477515,
267
- "acc_norm": 0.695067264573991,
268
- "acc_norm_stderr": 0.030898610882477515
269
- },
270
- "harness|hendrycksTest-human_sexuality|5": {
271
- "acc": 0.7709923664122137,
272
- "acc_stderr": 0.036853466317118506,
273
- "acc_norm": 0.7709923664122137,
274
- "acc_norm_stderr": 0.036853466317118506
275
- },
276
- "harness|hendrycksTest-international_law|5": {
277
- "acc": 0.8016528925619835,
278
- "acc_stderr": 0.03640118271990947,
279
- "acc_norm": 0.8016528925619835,
280
- "acc_norm_stderr": 0.03640118271990947
281
- },
282
- "harness|hendrycksTest-jurisprudence|5": {
283
- "acc": 0.8240740740740741,
284
- "acc_stderr": 0.036809181416738807,
285
- "acc_norm": 0.8240740740740741,
286
- "acc_norm_stderr": 0.036809181416738807
287
- },
288
- "harness|hendrycksTest-logical_fallacies|5": {
289
- "acc": 0.754601226993865,
290
- "acc_stderr": 0.03380939813943354,
291
- "acc_norm": 0.754601226993865,
292
- "acc_norm_stderr": 0.03380939813943354
293
- },
294
- "harness|hendrycksTest-machine_learning|5": {
295
- "acc": 0.4642857142857143,
296
- "acc_stderr": 0.04733667890053756,
297
- "acc_norm": 0.4642857142857143,
298
- "acc_norm_stderr": 0.04733667890053756
299
- },
300
- "harness|hendrycksTest-management|5": {
301
- "acc": 0.7961165048543689,
302
- "acc_stderr": 0.039891398595317706,
303
- "acc_norm": 0.7961165048543689,
304
- "acc_norm_stderr": 0.039891398595317706
305
- },
306
- "harness|hendrycksTest-marketing|5": {
307
- "acc": 0.8846153846153846,
308
- "acc_stderr": 0.020930193185179326,
309
- "acc_norm": 0.8846153846153846,
310
- "acc_norm_stderr": 0.020930193185179326
311
- },
312
- "harness|hendrycksTest-medical_genetics|5": {
313
- "acc": 0.72,
314
- "acc_stderr": 0.045126085985421276,
315
- "acc_norm": 0.72,
316
- "acc_norm_stderr": 0.045126085985421276
317
- },
318
- "harness|hendrycksTest-miscellaneous|5": {
319
- "acc": 0.8288633461047255,
320
- "acc_stderr": 0.0134682016140663,
321
- "acc_norm": 0.8288633461047255,
322
- "acc_norm_stderr": 0.0134682016140663
323
- },
324
- "harness|hendrycksTest-moral_disputes|5": {
325
- "acc": 0.7369942196531792,
326
- "acc_stderr": 0.023703099525258172,
327
- "acc_norm": 0.7369942196531792,
328
- "acc_norm_stderr": 0.023703099525258172
329
- },
330
- "harness|hendrycksTest-moral_scenarios|5": {
331
- "acc": 0.41564245810055866,
332
- "acc_stderr": 0.016482782187500676,
333
- "acc_norm": 0.41564245810055866,
334
- "acc_norm_stderr": 0.016482782187500676
335
- },
336
- "harness|hendrycksTest-nutrition|5": {
337
- "acc": 0.7320261437908496,
338
- "acc_stderr": 0.025360603796242557,
339
- "acc_norm": 0.7320261437908496,
340
- "acc_norm_stderr": 0.025360603796242557
341
- },
342
- "harness|hendrycksTest-philosophy|5": {
343
- "acc": 0.7363344051446945,
344
- "acc_stderr": 0.02502553850053234,
345
- "acc_norm": 0.7363344051446945,
346
- "acc_norm_stderr": 0.02502553850053234
347
- },
348
- "harness|hendrycksTest-prehistory|5": {
349
- "acc": 0.7376543209876543,
350
- "acc_stderr": 0.024477222856135107,
351
- "acc_norm": 0.7376543209876543,
352
- "acc_norm_stderr": 0.024477222856135107
353
- },
354
- "harness|hendrycksTest-professional_accounting|5": {
355
- "acc": 0.5141843971631206,
356
- "acc_stderr": 0.02981549448368206,
357
- "acc_norm": 0.5141843971631206,
358
- "acc_norm_stderr": 0.02981549448368206
359
- },
360
- "harness|hendrycksTest-professional_law|5": {
361
- "acc": 0.46740547588005216,
362
- "acc_stderr": 0.012743072942653349,
363
- "acc_norm": 0.46740547588005216,
364
- "acc_norm_stderr": 0.012743072942653349
365
- },
366
- "harness|hendrycksTest-professional_medicine|5": {
367
- "acc": 0.6801470588235294,
368
- "acc_stderr": 0.028332959514031218,
369
- "acc_norm": 0.6801470588235294,
370
- "acc_norm_stderr": 0.028332959514031218
371
- },
372
- "harness|hendrycksTest-professional_psychology|5": {
373
- "acc": 0.6699346405228758,
374
- "acc_stderr": 0.019023726160724553,
375
- "acc_norm": 0.6699346405228758,
376
- "acc_norm_stderr": 0.019023726160724553
377
- },
378
- "harness|hendrycksTest-public_relations|5": {
379
- "acc": 0.6727272727272727,
380
- "acc_stderr": 0.0449429086625209,
381
- "acc_norm": 0.6727272727272727,
382
- "acc_norm_stderr": 0.0449429086625209
383
- },
384
- "harness|hendrycksTest-security_studies|5": {
385
- "acc": 0.7142857142857143,
386
- "acc_stderr": 0.0289205832206756,
387
- "acc_norm": 0.7142857142857143,
388
- "acc_norm_stderr": 0.0289205832206756
389
- },
390
- "harness|hendrycksTest-sociology|5": {
391
- "acc": 0.845771144278607,
392
- "acc_stderr": 0.025538433368578337,
393
- "acc_norm": 0.845771144278607,
394
- "acc_norm_stderr": 0.025538433368578337
395
- },
396
- "harness|hendrycksTest-us_foreign_policy|5": {
397
- "acc": 0.84,
398
- "acc_stderr": 0.03684529491774708,
399
- "acc_norm": 0.84,
400
- "acc_norm_stderr": 0.03684529491774708
401
- },
402
- "harness|hendrycksTest-virology|5": {
403
- "acc": 0.536144578313253,
404
- "acc_stderr": 0.038823108508905954,
405
- "acc_norm": 0.536144578313253,
406
- "acc_norm_stderr": 0.038823108508905954
407
- },
408
- "harness|hendrycksTest-world_religions|5": {
409
- "acc": 0.8245614035087719,
410
- "acc_stderr": 0.02917088550072767,
411
- "acc_norm": 0.8245614035087719,
412
- "acc_norm_stderr": 0.02917088550072767
413
- },
414
- "harness|truthfulqa:mc|0": {
415
- "mc1": 0.44430844553243576,
416
- "mc1_stderr": 0.01739458625074317,
417
- "mc2": 0.6132594486430695,
418
- "mc2_stderr": 0.015532509494332434
419
- },
420
- "harness|winogrande|5": {
421
- "acc": 0.8105761641673244,
422
- "acc_stderr": 0.011012790432989245
423
- },
424
- "harness|gsm8k|5": {
425
- "acc": 0.7119029567854435,
426
- "acc_stderr": 0.012474469737197916
427
- }
428
- }
429
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  tokenizer_source: union
44
  ```
45
 
46
+ ## 💻 Usage
47
+ ```python
48
+ !pip install -qU transformers bitsandbytes accelerate
49
+
50
+ from transformers import AutoTokenizer
51
+ import transformers
52
+ import torch
53
+
54
+ model = "openmixtral-4x7b-merged"
55
+
56
+ tokenizer = AutoTokenizer.from_pretrained(model)
57
+ pipeline = transformers.pipeline(
58
+ "text-generation",
59
+ model=model,
60
+ model_kwargs={"torch_dtype": torch.float16, "load_in_4bit": True},
61
+ )
62
+
63
+ messages = [{"role": "user", "content": "Why the sky is blue."}]
64
+ prompt = pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
65
+ outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
66
+ print(outputs[0]["generated_text"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  ```
68
+
69
+ # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
70
+ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_mychen76__openmixtral-4x7b-merged)
71
+
72
+ | Metric |Value|
73
+ |---------------------------------|----:|
74
+ |Avg. |72.51|
75
+ |AI2 Reasoning Challenge (25-Shot)|69.45|
76
+ |HellaSwag (10-Shot) |86.75|
77
+ |MMLU (5-Shot) |65.29|
78
+ |TruthfulQA (0-shot) |61.33|
79
+ |Winogrande (5-shot) |81.06|
80
+ |GSM8k (5-shot) |71.19|