shubhrapandit commited on
Commit
ab97422
1 Parent(s): 7598c1c

Update model files

Browse files
arc_challenge.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "arc_challenge": {
4
+ "acc": 0.4351535836177474,
5
+ "acc_stderr": 0.014487986197186047,
6
+ "acc_norm": 0.46757679180887374,
7
+ "acc_norm_stderr": 0.014580637569995421
8
+ }
9
+ },
10
+ "versions": {
11
+ "arc_challenge": 0
12
+ },
13
+ "config": {
14
+ "model": "sparseml",
15
+ "model_args": "pretrained=/cache/shubhra/models/platypus_dolphin/cerebras/spft-cerebras_llama2_sparse70_platypus_dolphin_KDFalse_GCTrue_LR1e-4_E4_quant_smooth8,trust_remote_code=True,dtype=bfloat16",
16
+ "num_fewshot": 25,
17
+ "batch_size": "16",
18
+ "batch_sizes": [],
19
+ "device": "cuda:0",
20
+ "no_cache": true,
21
+ "limit": null,
22
+ "bootstrap_iters": 100000,
23
+ "description_dict": {}
24
+ }
25
+ }
config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "architectures": [
3
  "LlamaForCausalLM"
4
  ],
@@ -22,7 +23,7 @@
22
  "tie_word_embeddings": false,
23
  "tokenizer_class": "LlamaTokenizerFast",
24
  "torch_dtype": "float32",
25
- "transformers_version": "1.7.0.43401",
26
  "use_cache": true,
27
  "vocab_size": 32000
28
  }
 
1
  {
2
+ "_name_or_path": "/cache/shubhra/models/platypus_dolphin/cerebras/spft-cerebras_llama2_sparse70_platypus_dolphin_KDFalse_GCTrue_LR1e-4_E4/combined/",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
23
  "tie_word_embeddings": false,
24
  "tokenizer_class": "LlamaTokenizerFast",
25
  "torch_dtype": "float32",
26
+ "transformers_version": "4.39.3",
27
  "use_cache": true,
28
  "vocab_size": 32000
29
  }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.39.3"
6
+ }
gsm8k.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "gsm8k": {
4
+ "acc": 0.15238817285822592,
5
+ "acc_stderr": 0.009899572254794198
6
+ }
7
+ },
8
+ "versions": {
9
+ "gsm8k": 0
10
+ },
11
+ "config": {
12
+ "model": "sparseml",
13
+ "model_args": "pretrained=/cache/shubhra/models/platypus_dolphin/cerebras/spft-cerebras_llama2_sparse70_platypus_dolphin_KDFalse_GCTrue_LR1e-4_E4_quant_smooth8,trust_remote_code=True,dtype=bfloat16",
14
+ "num_fewshot": 5,
15
+ "batch_size": "16",
16
+ "batch_sizes": [],
17
+ "device": "cuda:0",
18
+ "no_cache": true,
19
+ "limit": null,
20
+ "bootstrap_iters": 100000,
21
+ "description_dict": {}
22
+ }
23
+ }
hellaswag.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "hellaswag": {
4
+ "acc": 0.5596494722166899,
5
+ "acc_stderr": 0.00495414628651335,
6
+ "acc_norm": 0.753734315873332,
7
+ "acc_norm_stderr": 0.004299546103761425
8
+ }
9
+ },
10
+ "versions": {
11
+ "hellaswag": 0
12
+ },
13
+ "config": {
14
+ "model": "sparseml",
15
+ "model_args": "pretrained=/cache/shubhra/models/platypus_dolphin/cerebras/spft-cerebras_llama2_sparse70_platypus_dolphin_KDFalse_GCTrue_LR1e-4_E4_quant_smooth8,trust_remote_code=True,dtype=bfloat16",
16
+ "num_fewshot": 10,
17
+ "batch_size": "16",
18
+ "batch_sizes": [],
19
+ "device": "cuda:0",
20
+ "no_cache": true,
21
+ "limit": null,
22
+ "bootstrap_iters": 100000,
23
+ "description_dict": {}
24
+ }
25
+ }
mmlu.json ADDED
@@ -0,0 +1,417 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "hendrycksTest-abstract_algebra": {
4
+ "acc": 0.28,
5
+ "acc_stderr": 0.04512608598542129,
6
+ "acc_norm": 0.28,
7
+ "acc_norm_stderr": 0.04512608598542129
8
+ },
9
+ "hendrycksTest-anatomy": {
10
+ "acc": 0.42962962962962964,
11
+ "acc_stderr": 0.04276349494376599,
12
+ "acc_norm": 0.42962962962962964,
13
+ "acc_norm_stderr": 0.04276349494376599
14
+ },
15
+ "hendrycksTest-astronomy": {
16
+ "acc": 0.47368421052631576,
17
+ "acc_stderr": 0.04063302731486671,
18
+ "acc_norm": 0.47368421052631576,
19
+ "acc_norm_stderr": 0.04063302731486671
20
+ },
21
+ "hendrycksTest-business_ethics": {
22
+ "acc": 0.46,
23
+ "acc_stderr": 0.05009082659620332,
24
+ "acc_norm": 0.46,
25
+ "acc_norm_stderr": 0.05009082659620332
26
+ },
27
+ "hendrycksTest-clinical_knowledge": {
28
+ "acc": 0.47547169811320755,
29
+ "acc_stderr": 0.030735822206205615,
30
+ "acc_norm": 0.47547169811320755,
31
+ "acc_norm_stderr": 0.030735822206205615
32
+ },
33
+ "hendrycksTest-college_biology": {
34
+ "acc": 0.375,
35
+ "acc_stderr": 0.04048439222695598,
36
+ "acc_norm": 0.375,
37
+ "acc_norm_stderr": 0.04048439222695598
38
+ },
39
+ "hendrycksTest-college_chemistry": {
40
+ "acc": 0.42,
41
+ "acc_stderr": 0.04960449637488584,
42
+ "acc_norm": 0.42,
43
+ "acc_norm_stderr": 0.04960449637488584
44
+ },
45
+ "hendrycksTest-college_computer_science": {
46
+ "acc": 0.36,
47
+ "acc_stderr": 0.048241815132442176,
48
+ "acc_norm": 0.36,
49
+ "acc_norm_stderr": 0.048241815132442176
50
+ },
51
+ "hendrycksTest-college_mathematics": {
52
+ "acc": 0.26,
53
+ "acc_stderr": 0.044084400227680794,
54
+ "acc_norm": 0.26,
55
+ "acc_norm_stderr": 0.044084400227680794
56
+ },
57
+ "hendrycksTest-college_medicine": {
58
+ "acc": 0.4046242774566474,
59
+ "acc_stderr": 0.03742461193887248,
60
+ "acc_norm": 0.4046242774566474,
61
+ "acc_norm_stderr": 0.03742461193887248
62
+ },
63
+ "hendrycksTest-college_physics": {
64
+ "acc": 0.21568627450980393,
65
+ "acc_stderr": 0.04092563958237654,
66
+ "acc_norm": 0.21568627450980393,
67
+ "acc_norm_stderr": 0.04092563958237654
68
+ },
69
+ "hendrycksTest-computer_security": {
70
+ "acc": 0.53,
71
+ "acc_stderr": 0.050161355804659205,
72
+ "acc_norm": 0.53,
73
+ "acc_norm_stderr": 0.050161355804659205
74
+ },
75
+ "hendrycksTest-conceptual_physics": {
76
+ "acc": 0.34893617021276596,
77
+ "acc_stderr": 0.031158522131357787,
78
+ "acc_norm": 0.34893617021276596,
79
+ "acc_norm_stderr": 0.031158522131357787
80
+ },
81
+ "hendrycksTest-econometrics": {
82
+ "acc": 0.2807017543859649,
83
+ "acc_stderr": 0.042270544512322,
84
+ "acc_norm": 0.2807017543859649,
85
+ "acc_norm_stderr": 0.042270544512322
86
+ },
87
+ "hendrycksTest-electrical_engineering": {
88
+ "acc": 0.4482758620689655,
89
+ "acc_stderr": 0.04144311810878152,
90
+ "acc_norm": 0.4482758620689655,
91
+ "acc_norm_stderr": 0.04144311810878152
92
+ },
93
+ "hendrycksTest-elementary_mathematics": {
94
+ "acc": 0.291005291005291,
95
+ "acc_stderr": 0.02339382650048487,
96
+ "acc_norm": 0.291005291005291,
97
+ "acc_norm_stderr": 0.02339382650048487
98
+ },
99
+ "hendrycksTest-formal_logic": {
100
+ "acc": 0.23015873015873015,
101
+ "acc_stderr": 0.037649508797906045,
102
+ "acc_norm": 0.23015873015873015,
103
+ "acc_norm_stderr": 0.037649508797906045
104
+ },
105
+ "hendrycksTest-global_facts": {
106
+ "acc": 0.38,
107
+ "acc_stderr": 0.048783173121456316,
108
+ "acc_norm": 0.38,
109
+ "acc_norm_stderr": 0.048783173121456316
110
+ },
111
+ "hendrycksTest-high_school_biology": {
112
+ "acc": 0.432258064516129,
113
+ "acc_stderr": 0.028181739720019413,
114
+ "acc_norm": 0.432258064516129,
115
+ "acc_norm_stderr": 0.028181739720019413
116
+ },
117
+ "hendrycksTest-high_school_chemistry": {
118
+ "acc": 0.3054187192118227,
119
+ "acc_stderr": 0.03240661565868408,
120
+ "acc_norm": 0.3054187192118227,
121
+ "acc_norm_stderr": 0.03240661565868408
122
+ },
123
+ "hendrycksTest-high_school_computer_science": {
124
+ "acc": 0.43,
125
+ "acc_stderr": 0.04975698519562428,
126
+ "acc_norm": 0.43,
127
+ "acc_norm_stderr": 0.04975698519562428
128
+ },
129
+ "hendrycksTest-high_school_european_history": {
130
+ "acc": 0.5757575757575758,
131
+ "acc_stderr": 0.03859268142070265,
132
+ "acc_norm": 0.5757575757575758,
133
+ "acc_norm_stderr": 0.03859268142070265
134
+ },
135
+ "hendrycksTest-high_school_geography": {
136
+ "acc": 0.4797979797979798,
137
+ "acc_stderr": 0.035594435655639196,
138
+ "acc_norm": 0.4797979797979798,
139
+ "acc_norm_stderr": 0.035594435655639196
140
+ },
141
+ "hendrycksTest-high_school_government_and_politics": {
142
+ "acc": 0.6010362694300518,
143
+ "acc_stderr": 0.03533999094065696,
144
+ "acc_norm": 0.6010362694300518,
145
+ "acc_norm_stderr": 0.03533999094065696
146
+ },
147
+ "hendrycksTest-high_school_macroeconomics": {
148
+ "acc": 0.4128205128205128,
149
+ "acc_stderr": 0.024962683564331803,
150
+ "acc_norm": 0.4128205128205128,
151
+ "acc_norm_stderr": 0.024962683564331803
152
+ },
153
+ "hendrycksTest-high_school_mathematics": {
154
+ "acc": 0.26666666666666666,
155
+ "acc_stderr": 0.02696242432507384,
156
+ "acc_norm": 0.26666666666666666,
157
+ "acc_norm_stderr": 0.02696242432507384
158
+ },
159
+ "hendrycksTest-high_school_microeconomics": {
160
+ "acc": 0.40756302521008403,
161
+ "acc_stderr": 0.03191863374478465,
162
+ "acc_norm": 0.40756302521008403,
163
+ "acc_norm_stderr": 0.03191863374478465
164
+ },
165
+ "hendrycksTest-high_school_physics": {
166
+ "acc": 0.33112582781456956,
167
+ "acc_stderr": 0.038425817186598696,
168
+ "acc_norm": 0.33112582781456956,
169
+ "acc_norm_stderr": 0.038425817186598696
170
+ },
171
+ "hendrycksTest-high_school_psychology": {
172
+ "acc": 0.5082568807339449,
173
+ "acc_stderr": 0.021434399918214327,
174
+ "acc_norm": 0.5082568807339449,
175
+ "acc_norm_stderr": 0.021434399918214327
176
+ },
177
+ "hendrycksTest-high_school_statistics": {
178
+ "acc": 0.32407407407407407,
179
+ "acc_stderr": 0.03191923445686186,
180
+ "acc_norm": 0.32407407407407407,
181
+ "acc_norm_stderr": 0.03191923445686186
182
+ },
183
+ "hendrycksTest-high_school_us_history": {
184
+ "acc": 0.5588235294117647,
185
+ "acc_stderr": 0.034849415144292316,
186
+ "acc_norm": 0.5588235294117647,
187
+ "acc_norm_stderr": 0.034849415144292316
188
+ },
189
+ "hendrycksTest-high_school_world_history": {
190
+ "acc": 0.6455696202531646,
191
+ "acc_stderr": 0.031137304297185805,
192
+ "acc_norm": 0.6455696202531646,
193
+ "acc_norm_stderr": 0.031137304297185805
194
+ },
195
+ "hendrycksTest-human_aging": {
196
+ "acc": 0.4080717488789238,
197
+ "acc_stderr": 0.03298574607842822,
198
+ "acc_norm": 0.4080717488789238,
199
+ "acc_norm_stderr": 0.03298574607842822
200
+ },
201
+ "hendrycksTest-human_sexuality": {
202
+ "acc": 0.45038167938931295,
203
+ "acc_stderr": 0.04363643698524779,
204
+ "acc_norm": 0.45038167938931295,
205
+ "acc_norm_stderr": 0.04363643698524779
206
+ },
207
+ "hendrycksTest-international_law": {
208
+ "acc": 0.5867768595041323,
209
+ "acc_stderr": 0.04495087843548408,
210
+ "acc_norm": 0.5867768595041323,
211
+ "acc_norm_stderr": 0.04495087843548408
212
+ },
213
+ "hendrycksTest-jurisprudence": {
214
+ "acc": 0.42592592592592593,
215
+ "acc_stderr": 0.0478034362693679,
216
+ "acc_norm": 0.42592592592592593,
217
+ "acc_norm_stderr": 0.0478034362693679
218
+ },
219
+ "hendrycksTest-logical_fallacies": {
220
+ "acc": 0.4294478527607362,
221
+ "acc_stderr": 0.03889066619112722,
222
+ "acc_norm": 0.4294478527607362,
223
+ "acc_norm_stderr": 0.03889066619112722
224
+ },
225
+ "hendrycksTest-machine_learning": {
226
+ "acc": 0.25892857142857145,
227
+ "acc_stderr": 0.04157751539865629,
228
+ "acc_norm": 0.25892857142857145,
229
+ "acc_norm_stderr": 0.04157751539865629
230
+ },
231
+ "hendrycksTest-management": {
232
+ "acc": 0.5631067961165048,
233
+ "acc_stderr": 0.04911147107365777,
234
+ "acc_norm": 0.5631067961165048,
235
+ "acc_norm_stderr": 0.04911147107365777
236
+ },
237
+ "hendrycksTest-marketing": {
238
+ "acc": 0.5470085470085471,
239
+ "acc_stderr": 0.03261099873098618,
240
+ "acc_norm": 0.5470085470085471,
241
+ "acc_norm_stderr": 0.03261099873098618
242
+ },
243
+ "hendrycksTest-medical_genetics": {
244
+ "acc": 0.38,
245
+ "acc_stderr": 0.04878317312145633,
246
+ "acc_norm": 0.38,
247
+ "acc_norm_stderr": 0.04878317312145633
248
+ },
249
+ "hendrycksTest-miscellaneous": {
250
+ "acc": 0.5696040868454662,
251
+ "acc_stderr": 0.01770586877629239,
252
+ "acc_norm": 0.5696040868454662,
253
+ "acc_norm_stderr": 0.01770586877629239
254
+ },
255
+ "hendrycksTest-moral_disputes": {
256
+ "acc": 0.43641618497109824,
257
+ "acc_stderr": 0.02670054542494368,
258
+ "acc_norm": 0.43641618497109824,
259
+ "acc_norm_stderr": 0.02670054542494368
260
+ },
261
+ "hendrycksTest-moral_scenarios": {
262
+ "acc": 0.2581005586592179,
263
+ "acc_stderr": 0.014635185616527836,
264
+ "acc_norm": 0.2581005586592179,
265
+ "acc_norm_stderr": 0.014635185616527836
266
+ },
267
+ "hendrycksTest-nutrition": {
268
+ "acc": 0.5065359477124183,
269
+ "acc_stderr": 0.028627470550556054,
270
+ "acc_norm": 0.5065359477124183,
271
+ "acc_norm_stderr": 0.028627470550556054
272
+ },
273
+ "hendrycksTest-philosophy": {
274
+ "acc": 0.4887459807073955,
275
+ "acc_stderr": 0.028390897396863533,
276
+ "acc_norm": 0.4887459807073955,
277
+ "acc_norm_stderr": 0.028390897396863533
278
+ },
279
+ "hendrycksTest-prehistory": {
280
+ "acc": 0.47530864197530864,
281
+ "acc_stderr": 0.027786800931427436,
282
+ "acc_norm": 0.47530864197530864,
283
+ "acc_norm_stderr": 0.027786800931427436
284
+ },
285
+ "hendrycksTest-professional_accounting": {
286
+ "acc": 0.3333333333333333,
287
+ "acc_stderr": 0.028121636040639893,
288
+ "acc_norm": 0.3333333333333333,
289
+ "acc_norm_stderr": 0.028121636040639893
290
+ },
291
+ "hendrycksTest-professional_law": {
292
+ "acc": 0.333116036505867,
293
+ "acc_stderr": 0.012037930451512052,
294
+ "acc_norm": 0.333116036505867,
295
+ "acc_norm_stderr": 0.012037930451512052
296
+ },
297
+ "hendrycksTest-professional_medicine": {
298
+ "acc": 0.3492647058823529,
299
+ "acc_stderr": 0.028959755196824852,
300
+ "acc_norm": 0.3492647058823529,
301
+ "acc_norm_stderr": 0.028959755196824852
302
+ },
303
+ "hendrycksTest-professional_psychology": {
304
+ "acc": 0.4068627450980392,
305
+ "acc_stderr": 0.019873802005061177,
306
+ "acc_norm": 0.4068627450980392,
307
+ "acc_norm_stderr": 0.019873802005061177
308
+ },
309
+ "hendrycksTest-public_relations": {
310
+ "acc": 0.4818181818181818,
311
+ "acc_stderr": 0.04785964010794916,
312
+ "acc_norm": 0.4818181818181818,
313
+ "acc_norm_stderr": 0.04785964010794916
314
+ },
315
+ "hendrycksTest-security_studies": {
316
+ "acc": 0.4775510204081633,
317
+ "acc_stderr": 0.03197694118713672,
318
+ "acc_norm": 0.4775510204081633,
319
+ "acc_norm_stderr": 0.03197694118713672
320
+ },
321
+ "hendrycksTest-sociology": {
322
+ "acc": 0.5771144278606966,
323
+ "acc_stderr": 0.034932317774212816,
324
+ "acc_norm": 0.5771144278606966,
325
+ "acc_norm_stderr": 0.034932317774212816
326
+ },
327
+ "hendrycksTest-us_foreign_policy": {
328
+ "acc": 0.64,
329
+ "acc_stderr": 0.048241815132442176,
330
+ "acc_norm": 0.64,
331
+ "acc_norm_stderr": 0.048241815132442176
332
+ },
333
+ "hendrycksTest-virology": {
334
+ "acc": 0.42168674698795183,
335
+ "acc_stderr": 0.03844453181770917,
336
+ "acc_norm": 0.42168674698795183,
337
+ "acc_norm_stderr": 0.03844453181770917
338
+ },
339
+ "hendrycksTest-world_religions": {
340
+ "acc": 0.5847953216374269,
341
+ "acc_stderr": 0.03779275945503201,
342
+ "acc_norm": 0.5847953216374269,
343
+ "acc_norm_stderr": 0.03779275945503201
344
+ }
345
+ },
346
+ "versions": {
347
+ "hendrycksTest-abstract_algebra": 1,
348
+ "hendrycksTest-anatomy": 1,
349
+ "hendrycksTest-astronomy": 1,
350
+ "hendrycksTest-business_ethics": 1,
351
+ "hendrycksTest-clinical_knowledge": 1,
352
+ "hendrycksTest-college_biology": 1,
353
+ "hendrycksTest-college_chemistry": 1,
354
+ "hendrycksTest-college_computer_science": 1,
355
+ "hendrycksTest-college_mathematics": 1,
356
+ "hendrycksTest-college_medicine": 1,
357
+ "hendrycksTest-college_physics": 1,
358
+ "hendrycksTest-computer_security": 1,
359
+ "hendrycksTest-conceptual_physics": 1,
360
+ "hendrycksTest-econometrics": 1,
361
+ "hendrycksTest-electrical_engineering": 1,
362
+ "hendrycksTest-elementary_mathematics": 1,
363
+ "hendrycksTest-formal_logic": 1,
364
+ "hendrycksTest-global_facts": 1,
365
+ "hendrycksTest-high_school_biology": 1,
366
+ "hendrycksTest-high_school_chemistry": 1,
367
+ "hendrycksTest-high_school_computer_science": 1,
368
+ "hendrycksTest-high_school_european_history": 1,
369
+ "hendrycksTest-high_school_geography": 1,
370
+ "hendrycksTest-high_school_government_and_politics": 1,
371
+ "hendrycksTest-high_school_macroeconomics": 1,
372
+ "hendrycksTest-high_school_mathematics": 1,
373
+ "hendrycksTest-high_school_microeconomics": 1,
374
+ "hendrycksTest-high_school_physics": 1,
375
+ "hendrycksTest-high_school_psychology": 1,
376
+ "hendrycksTest-high_school_statistics": 1,
377
+ "hendrycksTest-high_school_us_history": 1,
378
+ "hendrycksTest-high_school_world_history": 1,
379
+ "hendrycksTest-human_aging": 1,
380
+ "hendrycksTest-human_sexuality": 1,
381
+ "hendrycksTest-international_law": 1,
382
+ "hendrycksTest-jurisprudence": 1,
383
+ "hendrycksTest-logical_fallacies": 1,
384
+ "hendrycksTest-machine_learning": 1,
385
+ "hendrycksTest-management": 1,
386
+ "hendrycksTest-marketing": 1,
387
+ "hendrycksTest-medical_genetics": 1,
388
+ "hendrycksTest-miscellaneous": 1,
389
+ "hendrycksTest-moral_disputes": 1,
390
+ "hendrycksTest-moral_scenarios": 1,
391
+ "hendrycksTest-nutrition": 1,
392
+ "hendrycksTest-philosophy": 1,
393
+ "hendrycksTest-prehistory": 1,
394
+ "hendrycksTest-professional_accounting": 1,
395
+ "hendrycksTest-professional_law": 1,
396
+ "hendrycksTest-professional_medicine": 1,
397
+ "hendrycksTest-professional_psychology": 1,
398
+ "hendrycksTest-public_relations": 1,
399
+ "hendrycksTest-security_studies": 1,
400
+ "hendrycksTest-sociology": 1,
401
+ "hendrycksTest-us_foreign_policy": 1,
402
+ "hendrycksTest-virology": 1,
403
+ "hendrycksTest-world_religions": 1
404
+ },
405
+ "config": {
406
+ "model": "sparseml",
407
+ "model_args": "pretrained=/cache/shubhra/models/platypus_dolphin/cerebras/spft-cerebras_llama2_sparse70_platypus_dolphin_KDFalse_GCTrue_LR1e-4_E4_quant_smooth8,trust_remote_code=True,dtype=bfloat16",
408
+ "num_fewshot": 5,
409
+ "batch_size": "8",
410
+ "batch_sizes": [],
411
+ "device": "cuda:0",
412
+ "no_cache": true,
413
+ "limit": null,
414
+ "bootstrap_iters": 100000,
415
+ "description_dict": {}
416
+ }
417
+ }
model-orig.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e5e661b6d51ee379551157c22127ed36aecf7333389b43f9f7093d1fb498246
3
- size 1049663
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82a5192b20dc2eaaa3f89e0333c688ec9d77549add91f4afbb21f22fbced447d
3
+ size 1047380
model.data CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43758729d4bfddcc029b637aba79dcaebd358fd610b9d253bc27799c6a266a40
3
- size 7154772992
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3aa6517696cbe012a0651daead0b5809e8b9856311b5f02cffc8f965344f6b15
3
+ size 7425272832
model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90e427d59cf0bf29dd98c99188e85d8c3f5c7f93c1ecda48868f00d147c94c81
3
- size 1034608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9091e1dce25260ddbe379fba87728d10d150636c173e099eebb7756ba9c24595
3
+ size 1032325
recipe.yaml CHANGED
@@ -1,7 +1,7 @@
1
  test_stage:
2
  obcq_modifiers:
3
  SmoothQuantModifier:
4
- smoothing_strength: 0.9
5
  mappings:
6
  - - ['re:.*q_proj', 're:.*k_proj', 're:.*v_proj']
7
  - re:.*input_layernorm
@@ -11,9 +11,9 @@ test_stage:
11
  - re:.*up_proj
12
  QuantizationModifier:
13
  ignore: [LlamaRotaryEmbedding, LlamaRMSNorm, SiLUActivation, model.layers.30.mlp.down_proj,
14
- model.layers.1.mlp.down_proj, model.layers.0.mlp.down_proj, MatMulOutput_QK, MatMulOutput_PV,
15
- MatMulLeftInput_QK, MatMulLeftInput_PV, MatMulRightInput_QK, MatMulRightInput_PV,
16
- QuantizableMatMul]
17
  post_oneshot_calibration: true
18
  scheme_overrides:
19
  Linear:
@@ -35,5 +35,5 @@ test_stage:
35
  model.layers.16, model.layers.17, model.layers.18, model.layers.19, model.layers.20,
36
  model.layers.21, model.layers.22, model.layers.23, model.layers.24, model.layers.25,
37
  model.layers.26, model.layers.27, model.layers.28, model.layers.29, model.layers.30,
38
- model.layers.31]
39
  target_ids: [attention_mask, position_ids]
 
1
  test_stage:
2
  obcq_modifiers:
3
  SmoothQuantModifier:
4
+ smoothing_strength: 0.8
5
  mappings:
6
  - - ['re:.*q_proj', 're:.*k_proj', 're:.*v_proj']
7
  - re:.*input_layernorm
 
11
  - re:.*up_proj
12
  QuantizationModifier:
13
  ignore: [LlamaRotaryEmbedding, LlamaRMSNorm, SiLUActivation, model.layers.30.mlp.down_proj,
14
+ model.layers.1.mlp.down_proj, model.layers.0.mlp.down_proj, model.layers.4.mlp.down_proj,
15
+ model.layers.8.mlp.down_proj, MatMulOutput_QK, MatMulOutput_PV, MatMulLeftInput_QK,
16
+ MatMulLeftInput_PV, MatMulRightInput_QK, MatMulRightInput_PV, QuantizableMatMul]
17
  post_oneshot_calibration: true
18
  scheme_overrides:
19
  Linear:
 
35
  model.layers.16, model.layers.17, model.layers.18, model.layers.19, model.layers.20,
36
  model.layers.21, model.layers.22, model.layers.23, model.layers.24, model.layers.25,
37
  model.layers.26, model.layers.27, model.layers.28, model.layers.29, model.layers.30,
38
+ model.layers.31, lm_head]
39
  target_ids: [attention_mask, position_ids]
tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 4096,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
tokenizer_config.json CHANGED
@@ -1,4 +1,6 @@
1
  {
 
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<unk>",
 
1
  {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
truthfulqa_mc.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "truthfulqa_mc": {
4
+ "mc1": 0.2729498164014688,
5
+ "mc1_stderr": 0.015594753632006518,
6
+ "mc2": 0.4190461960683527,
7
+ "mc2_stderr": 0.01451417258125535
8
+ }
9
+ },
10
+ "versions": {
11
+ "truthfulqa_mc": 1
12
+ },
13
+ "config": {
14
+ "model": "sparseml",
15
+ "model_args": "pretrained=/cache/shubhra/models/platypus_dolphin/cerebras/spft-cerebras_llama2_sparse70_platypus_dolphin_KDFalse_GCTrue_LR1e-4_E4_quant_smooth8,trust_remote_code=True,dtype=bfloat16",
16
+ "num_fewshot": 0,
17
+ "batch_size": "16",
18
+ "batch_sizes": [],
19
+ "device": "cuda:0",
20
+ "no_cache": true,
21
+ "limit": null,
22
+ "bootstrap_iters": 100000,
23
+ "description_dict": {}
24
+ }
25
+ }
winogrande.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "winogrande": {
4
+ "acc": 0.6866614048934491,
5
+ "acc_stderr": 0.013036512096747983
6
+ }
7
+ },
8
+ "versions": {
9
+ "winogrande": 0
10
+ },
11
+ "config": {
12
+ "model": "sparseml",
13
+ "model_args": "pretrained=/cache/shubhra/models/platypus_dolphin/cerebras/spft-cerebras_llama2_sparse70_platypus_dolphin_KDFalse_GCTrue_LR1e-4_E4_quant_smooth8,trust_remote_code=True,dtype=bfloat16",
14
+ "num_fewshot": 5,
15
+ "batch_size": "16",
16
+ "batch_sizes": [],
17
+ "device": "cuda:0",
18
+ "no_cache": true,
19
+ "limit": null,
20
+ "bootstrap_iters": 100000,
21
+ "description_dict": {}
22
+ }
23
+ }