mychen76 commited on
Commit
a6792bc
1 Parent(s): 9f5bd0d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +31 -386
README.md CHANGED
@@ -19,7 +19,6 @@ mistral-7b-merged-dare is a merge of the following models:
19
  ```yaml
20
  models:
21
  - model: mistralai/Mistral-7B-v0.1
22
- # No parameters necessary for base model
23
  - model: samir-fama/SamirGPT-v1
24
  parameters:
25
  density: 0.53
@@ -40,392 +39,38 @@ dtype: bfloat16
40
 
41
  ```
42
 
43
- ### Evaluation Details
44
- https://huggingface.co/datasets/open-llm-leaderboard/details_mychen76__mistral-7b-merged-dare
 
45
 
46
- ### Result:
47
- https://huggingface.co/datasets/open-llm-leaderboard/details_mychen76__mistral-7b-merged-dare/blob/main/results_2024-03-10T11-06-23.658904.json
 
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  ```
50
- {
51
- "all": {
52
- "acc": 0.6555984630611819,
53
- "acc_stderr": 0.03202413494937558,
54
- "acc_norm": 0.6552172318444804,
55
- "acc_norm_stderr": 0.03269035140382117,
56
- "mc1": 0.46878824969400246,
57
- "mc1_stderr": 0.017469364874577537,
58
- "mc2": 0.6324196158065736,
59
- "mc2_stderr": 0.015183642172146008
60
- },
61
- "harness|arc:challenge|25": {
62
- "acc": 0.6655290102389079,
63
- "acc_stderr": 0.013787460322441374,
64
- "acc_norm": 0.697098976109215,
65
- "acc_norm_stderr": 0.013428241573185349
66
- },
67
- "harness|hellaswag|10": {
68
- "acc": 0.6896036646086438,
69
- "acc_stderr": 0.004617103280372031,
70
- "acc_norm": 0.8705437163911571,
71
- "acc_norm_stderr": 0.003350181812941611
72
- },
73
- "harness|hendrycksTest-abstract_algebra|5": {
74
- "acc": 0.36,
75
- "acc_stderr": 0.04824181513244218,
76
- "acc_norm": 0.36,
77
- "acc_norm_stderr": 0.04824181513244218
78
- },
79
- "harness|hendrycksTest-anatomy|5": {
80
- "acc": 0.6296296296296297,
81
- "acc_stderr": 0.041716541613545426,
82
- "acc_norm": 0.6296296296296297,
83
- "acc_norm_stderr": 0.041716541613545426
84
- },
85
- "harness|hendrycksTest-astronomy|5": {
86
- "acc": 0.6842105263157895,
87
- "acc_stderr": 0.0378272898086547,
88
- "acc_norm": 0.6842105263157895,
89
- "acc_norm_stderr": 0.0378272898086547
90
- },
91
- "harness|hendrycksTest-business_ethics|5": {
92
- "acc": 0.64,
93
- "acc_stderr": 0.04824181513244218,
94
- "acc_norm": 0.64,
95
- "acc_norm_stderr": 0.04824181513244218
96
- },
97
- "harness|hendrycksTest-clinical_knowledge|5": {
98
- "acc": 0.7245283018867924,
99
- "acc_stderr": 0.027495663683724057,
100
- "acc_norm": 0.7245283018867924,
101
- "acc_norm_stderr": 0.027495663683724057
102
- },
103
- "harness|hendrycksTest-college_biology|5": {
104
- "acc": 0.7708333333333334,
105
- "acc_stderr": 0.03514697467862388,
106
- "acc_norm": 0.7708333333333334,
107
- "acc_norm_stderr": 0.03514697467862388
108
- },
109
- "harness|hendrycksTest-college_chemistry|5": {
110
- "acc": 0.48,
111
- "acc_stderr": 0.050211673156867795,
112
- "acc_norm": 0.48,
113
- "acc_norm_stderr": 0.050211673156867795
114
- },
115
- "harness|hendrycksTest-college_computer_science|5": {
116
- "acc": 0.55,
117
- "acc_stderr": 0.05,
118
- "acc_norm": 0.55,
119
- "acc_norm_stderr": 0.05
120
- },
121
- "harness|hendrycksTest-college_mathematics|5": {
122
- "acc": 0.33,
123
- "acc_stderr": 0.04725815626252604,
124
- "acc_norm": 0.33,
125
- "acc_norm_stderr": 0.04725815626252604
126
- },
127
- "harness|hendrycksTest-college_medicine|5": {
128
- "acc": 0.653179190751445,
129
- "acc_stderr": 0.036291466701596636,
130
- "acc_norm": 0.653179190751445,
131
- "acc_norm_stderr": 0.036291466701596636
132
- },
133
- "harness|hendrycksTest-college_physics|5": {
134
- "acc": 0.4411764705882353,
135
- "acc_stderr": 0.049406356306056595,
136
- "acc_norm": 0.4411764705882353,
137
- "acc_norm_stderr": 0.049406356306056595
138
- },
139
- "harness|hendrycksTest-computer_security|5": {
140
- "acc": 0.76,
141
- "acc_stderr": 0.04292346959909282,
142
- "acc_norm": 0.76,
143
- "acc_norm_stderr": 0.04292346959909282
144
- },
145
- "harness|hendrycksTest-conceptual_physics|5": {
146
- "acc": 0.6,
147
- "acc_stderr": 0.03202563076101735,
148
- "acc_norm": 0.6,
149
- "acc_norm_stderr": 0.03202563076101735
150
- },
151
- "harness|hendrycksTest-econometrics|5": {
152
- "acc": 0.47368421052631576,
153
- "acc_stderr": 0.046970851366478626,
154
- "acc_norm": 0.47368421052631576,
155
- "acc_norm_stderr": 0.046970851366478626
156
- },
157
- "harness|hendrycksTest-electrical_engineering|5": {
158
- "acc": 0.5655172413793104,
159
- "acc_stderr": 0.04130740879555498,
160
- "acc_norm": 0.5655172413793104,
161
- "acc_norm_stderr": 0.04130740879555498
162
- },
163
- "harness|hendrycksTest-elementary_mathematics|5": {
164
- "acc": 0.42063492063492064,
165
- "acc_stderr": 0.025424835086924,
166
- "acc_norm": 0.42063492063492064,
167
- "acc_norm_stderr": 0.025424835086924
168
- },
169
- "harness|hendrycksTest-formal_logic|5": {
170
- "acc": 0.47619047619047616,
171
- "acc_stderr": 0.04467062628403273,
172
- "acc_norm": 0.47619047619047616,
173
- "acc_norm_stderr": 0.04467062628403273
174
- },
175
- "harness|hendrycksTest-global_facts|5": {
176
- "acc": 0.36,
177
- "acc_stderr": 0.048241815132442176,
178
- "acc_norm": 0.36,
179
- "acc_norm_stderr": 0.048241815132442176
180
- },
181
- "harness|hendrycksTest-high_school_biology|5": {
182
- "acc": 0.7774193548387097,
183
- "acc_stderr": 0.023664216671642518,
184
- "acc_norm": 0.7774193548387097,
185
- "acc_norm_stderr": 0.023664216671642518
186
- },
187
- "harness|hendrycksTest-high_school_chemistry|5": {
188
- "acc": 0.49261083743842365,
189
- "acc_stderr": 0.035176035403610084,
190
- "acc_norm": 0.49261083743842365,
191
- "acc_norm_stderr": 0.035176035403610084
192
- },
193
- "harness|hendrycksTest-high_school_computer_science|5": {
194
- "acc": 0.72,
195
- "acc_stderr": 0.04512608598542127,
196
- "acc_norm": 0.72,
197
- "acc_norm_stderr": 0.04512608598542127
198
- },
199
- "harness|hendrycksTest-high_school_european_history|5": {
200
- "acc": 0.7757575757575758,
201
- "acc_stderr": 0.03256866661681102,
202
- "acc_norm": 0.7757575757575758,
203
- "acc_norm_stderr": 0.03256866661681102
204
- },
205
- "harness|hendrycksTest-high_school_geography|5": {
206
- "acc": 0.7878787878787878,
207
- "acc_stderr": 0.02912652283458682,
208
- "acc_norm": 0.7878787878787878,
209
- "acc_norm_stderr": 0.02912652283458682
210
- },
211
- "harness|hendrycksTest-high_school_government_and_politics|5": {
212
- "acc": 0.9067357512953368,
213
- "acc_stderr": 0.020986854593289733,
214
- "acc_norm": 0.9067357512953368,
215
- "acc_norm_stderr": 0.020986854593289733
216
- },
217
- "harness|hendrycksTest-high_school_macroeconomics|5": {
218
- "acc": 0.6743589743589744,
219
- "acc_stderr": 0.02375966576741229,
220
- "acc_norm": 0.6743589743589744,
221
- "acc_norm_stderr": 0.02375966576741229
222
- },
223
- "harness|hendrycksTest-high_school_mathematics|5": {
224
- "acc": 0.32222222222222224,
225
- "acc_stderr": 0.028493465091028593,
226
- "acc_norm": 0.32222222222222224,
227
- "acc_norm_stderr": 0.028493465091028593
228
- },
229
- "harness|hendrycksTest-high_school_microeconomics|5": {
230
- "acc": 0.6848739495798319,
231
- "acc_stderr": 0.030176808288974337,
232
- "acc_norm": 0.6848739495798319,
233
- "acc_norm_stderr": 0.030176808288974337
234
- },
235
- "harness|hendrycksTest-high_school_physics|5": {
236
- "acc": 0.3576158940397351,
237
- "acc_stderr": 0.03913453431177258,
238
- "acc_norm": 0.3576158940397351,
239
- "acc_norm_stderr": 0.03913453431177258
240
- },
241
- "harness|hendrycksTest-high_school_psychology|5": {
242
- "acc": 0.8477064220183487,
243
- "acc_stderr": 0.015405084393157074,
244
- "acc_norm": 0.8477064220183487,
245
- "acc_norm_stderr": 0.015405084393157074
246
- },
247
- "harness|hendrycksTest-high_school_statistics|5": {
248
- "acc": 0.5324074074074074,
249
- "acc_stderr": 0.03402801581358966,
250
- "acc_norm": 0.5324074074074074,
251
- "acc_norm_stderr": 0.03402801581358966
252
- },
253
- "harness|hendrycksTest-high_school_us_history|5": {
254
- "acc": 0.8529411764705882,
255
- "acc_stderr": 0.024857478080250454,
256
- "acc_norm": 0.8529411764705882,
257
- "acc_norm_stderr": 0.024857478080250454
258
- },
259
- "harness|hendrycksTest-high_school_world_history|5": {
260
- "acc": 0.8059071729957806,
261
- "acc_stderr": 0.025744902532290916,
262
- "acc_norm": 0.8059071729957806,
263
- "acc_norm_stderr": 0.025744902532290916
264
- },
265
- "harness|hendrycksTest-human_aging|5": {
266
- "acc": 0.6860986547085202,
267
- "acc_stderr": 0.031146796482972465,
268
- "acc_norm": 0.6860986547085202,
269
- "acc_norm_stderr": 0.031146796482972465
270
- },
271
- "harness|hendrycksTest-human_sexuality|5": {
272
- "acc": 0.8091603053435115,
273
- "acc_stderr": 0.03446513350752599,
274
- "acc_norm": 0.8091603053435115,
275
- "acc_norm_stderr": 0.03446513350752599
276
- },
277
- "harness|hendrycksTest-international_law|5": {
278
- "acc": 0.7851239669421488,
279
- "acc_stderr": 0.037494924487096966,
280
- "acc_norm": 0.7851239669421488,
281
- "acc_norm_stderr": 0.037494924487096966
282
- },
283
- "harness|hendrycksTest-jurisprudence|5": {
284
- "acc": 0.7870370370370371,
285
- "acc_stderr": 0.039578354719809805,
286
- "acc_norm": 0.7870370370370371,
287
- "acc_norm_stderr": 0.039578354719809805
288
- },
289
- "harness|hendrycksTest-logical_fallacies|5": {
290
- "acc": 0.7730061349693251,
291
- "acc_stderr": 0.03291099578615769,
292
- "acc_norm": 0.7730061349693251,
293
- "acc_norm_stderr": 0.03291099578615769
294
- },
295
- "harness|hendrycksTest-machine_learning|5": {
296
- "acc": 0.42857142857142855,
297
- "acc_stderr": 0.04697113923010212,
298
- "acc_norm": 0.42857142857142855,
299
- "acc_norm_stderr": 0.04697113923010212
300
- },
301
- "harness|hendrycksTest-management|5": {
302
- "acc": 0.7669902912621359,
303
- "acc_stderr": 0.04185832598928315,
304
- "acc_norm": 0.7669902912621359,
305
- "acc_norm_stderr": 0.04185832598928315
306
- },
307
- "harness|hendrycksTest-marketing|5": {
308
- "acc": 0.8803418803418803,
309
- "acc_stderr": 0.021262719400406957,
310
- "acc_norm": 0.8803418803418803,
311
- "acc_norm_stderr": 0.021262719400406957
312
- },
313
- "harness|hendrycksTest-medical_genetics|5": {
314
- "acc": 0.71,
315
- "acc_stderr": 0.045604802157206845,
316
- "acc_norm": 0.71,
317
- "acc_norm_stderr": 0.045604802157206845
318
- },
319
- "harness|hendrycksTest-miscellaneous|5": {
320
- "acc": 0.8352490421455939,
321
- "acc_stderr": 0.01326534626132379,
322
- "acc_norm": 0.8352490421455939,
323
- "acc_norm_stderr": 0.01326534626132379
324
- },
325
- "harness|hendrycksTest-moral_disputes|5": {
326
- "acc": 0.7456647398843931,
327
- "acc_stderr": 0.02344582627654554,
328
- "acc_norm": 0.7456647398843931,
329
- "acc_norm_stderr": 0.02344582627654554
330
- },
331
- "harness|hendrycksTest-moral_scenarios|5": {
332
- "acc": 0.42569832402234636,
333
- "acc_stderr": 0.016536829648997112,
334
- "acc_norm": 0.42569832402234636,
335
- "acc_norm_stderr": 0.016536829648997112
336
- },
337
- "harness|hendrycksTest-nutrition|5": {
338
- "acc": 0.738562091503268,
339
- "acc_stderr": 0.025160998214292452,
340
- "acc_norm": 0.738562091503268,
341
- "acc_norm_stderr": 0.025160998214292452
342
- },
343
- "harness|hendrycksTest-philosophy|5": {
344
- "acc": 0.7138263665594855,
345
- "acc_stderr": 0.02567025924218893,
346
- "acc_norm": 0.7138263665594855,
347
- "acc_norm_stderr": 0.02567025924218893
348
- },
349
- "harness|hendrycksTest-prehistory|5": {
350
- "acc": 0.7438271604938271,
351
- "acc_stderr": 0.0242885336377261,
352
- "acc_norm": 0.7438271604938271,
353
- "acc_norm_stderr": 0.0242885336377261
354
- },
355
- "harness|hendrycksTest-professional_accounting|5": {
356
- "acc": 0.4929078014184397,
357
- "acc_stderr": 0.02982449855912901,
358
- "acc_norm": 0.4929078014184397,
359
- "acc_norm_stderr": 0.02982449855912901
360
- },
361
- "harness|hendrycksTest-professional_law|5": {
362
- "acc": 0.46740547588005216,
363
- "acc_stderr": 0.012743072942653342,
364
- "acc_norm": 0.46740547588005216,
365
- "acc_norm_stderr": 0.012743072942653342
366
- },
367
- "harness|hendrycksTest-professional_medicine|5": {
368
- "acc": 0.6727941176470589,
369
- "acc_stderr": 0.028501452860396553,
370
- "acc_norm": 0.6727941176470589,
371
- "acc_norm_stderr": 0.028501452860396553
372
- },
373
- "harness|hendrycksTest-professional_psychology|5": {
374
- "acc": 0.6764705882352942,
375
- "acc_stderr": 0.018926082916083383,
376
- "acc_norm": 0.6764705882352942,
377
- "acc_norm_stderr": 0.018926082916083383
378
- },
379
- "harness|hendrycksTest-public_relations|5": {
380
- "acc": 0.6818181818181818,
381
- "acc_stderr": 0.044612721759105085,
382
- "acc_norm": 0.6818181818181818,
383
- "acc_norm_stderr": 0.044612721759105085
384
- },
385
- "harness|hendrycksTest-security_studies|5": {
386
- "acc": 0.726530612244898,
387
- "acc_stderr": 0.02853556033712844,
388
- "acc_norm": 0.726530612244898,
389
- "acc_norm_stderr": 0.02853556033712844
390
- },
391
- "harness|hendrycksTest-sociology|5": {
392
- "acc": 0.8407960199004975,
393
- "acc_stderr": 0.02587064676616913,
394
- "acc_norm": 0.8407960199004975,
395
- "acc_norm_stderr": 0.02587064676616913
396
- },
397
- "harness|hendrycksTest-us_foreign_policy|5": {
398
- "acc": 0.86,
399
- "acc_stderr": 0.0348735088019777,
400
- "acc_norm": 0.86,
401
- "acc_norm_stderr": 0.0348735088019777
402
- },
403
- "harness|hendrycksTest-virology|5": {
404
- "acc": 0.5421686746987951,
405
- "acc_stderr": 0.0387862677100236,
406
- "acc_norm": 0.5421686746987951,
407
- "acc_norm_stderr": 0.0387862677100236
408
- },
409
- "harness|hendrycksTest-world_religions|5": {
410
- "acc": 0.8421052631578947,
411
- "acc_stderr": 0.027966785859160893,
412
- "acc_norm": 0.8421052631578947,
413
- "acc_norm_stderr": 0.027966785859160893
414
- },
415
- "harness|truthfulqa:mc|0": {
416
- "mc1": 0.46878824969400246,
417
- "mc1_stderr": 0.017469364874577537,
418
- "mc2": 0.6324196158065736,
419
- "mc2_stderr": 0.015183642172146008
420
- },
421
- "harness|winogrande|5": {
422
- "acc": 0.8161010260457774,
423
- "acc_stderr": 0.010887916013305889
424
- },
425
- "harness|gsm8k|5": {
426
- "acc": 0.730098559514784,
427
- "acc_stderr": 0.012227442856468897
428
- }
429
- }
430
 
431
- ```
 
 
 
 
 
 
 
 
 
 
 
 
19
  ```yaml
20
  models:
21
  - model: mistralai/Mistral-7B-v0.1
 
22
  - model: samir-fama/SamirGPT-v1
23
  parameters:
24
  density: 0.53
 
39
 
40
  ```
41
 
42
+ ## 💻 Usage
43
+ ```python
44
+ !pip install -qU transformers bitsandbytes accelerate
45
 
46
+ from transformers import AutoTokenizer
47
+ import transformers
48
+ import torch
49
 
50
+ model = "mayacinka/West-Ramen-7Bx4"
51
+
52
+ tokenizer = AutoTokenizer.from_pretrained(model)
53
+ pipeline = transformers.pipeline(
54
+ "text-generation",
55
+ model=model,
56
+ model_kwargs={"torch_dtype": torch.float16, "load_in_4bit": True},
57
+ )
58
+
59
+ messages = [{"role": "user", "content": "Explain what a Mixture of Experts is in less than 100 words."}]
60
+ prompt = pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
61
+ outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
62
+ print(outputs[0]["generated_text"])
63
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
+ # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
66
+ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_mychen76__mistral-7b-merged-dare)
67
+
68
+ | Metric |Value|
69
+ |---------------------------------|----:|
70
+ |Avg. |73.46|
71
+ |AI2 Reasoning Challenge (25-Shot)|69.71|
72
+ |HellaSwag (10-Shot) |87.05|
73
+ |MMLU (5-Shot) |65.07|
74
+ |TruthfulQA (0-shot) |63.24|
75
+ |Winogrande (5-shot) |81.61|
76
+ |GSM8k (5-shot) |73.01|