geonmin-kim commited on
Commit
7bed016
1 Parent(s): ebebfcc

Upload folder using huggingface_hub

Browse files
mlc-chat-config.json CHANGED
@@ -11,19 +11,21 @@
11
  "num_key_value_heads": 4,
12
  "rms_norm_eps": 1e-06,
13
  "rope_theta": 1000000.0,
14
- "vocab_size": 152064,
15
  "tie_word_embeddings": false,
16
- "context_window_size": 512,
17
- "prefill_chunk_size": 512,
18
  "tensor_parallel_shards": 1,
19
  "head_dim": 128,
20
  "dtype": "float32",
21
- "max_batch_size": 128
22
  },
23
- "vocab_size": 152064,
24
- "context_window_size": 512,
 
 
25
  "sliding_window_size": -1,
26
- "prefill_chunk_size": 512,
27
  "attention_sink_size": -1,
28
  "tensor_parallel_shards": 1,
29
  "pipeline_parallel_stages": 1,
@@ -45,13 +47,13 @@
45
  },
46
  "conv_template": {
47
  "name": "qwen2",
48
- "system_template": "<|im_start|>system\n{system_message}<|im_end|>\n",
49
  "system_message": "You are a helpful assistant.",
50
  "system_prefix_token_ids": null,
51
  "add_role_after_system_message": true,
52
  "roles": {
53
- "user": "<|im_start|>user",
54
- "assistant": "<|im_start|>assistant"
55
  },
56
  "role_templates": {
57
  "user": "{user_message}",
@@ -60,25 +62,21 @@
60
  },
61
  "messages": [],
62
  "seps": [
63
- "<|im_end|>\n"
64
  ],
65
  "role_content_sep": "\n",
66
  "role_empty_sep": "\n",
67
  "stop_str": [
68
- "<|endoftext|>",
69
- "<|im_end|>"
70
  ],
71
  "stop_token_ids": [
72
- 151643,
73
- 151645
74
  ],
75
  "function_string": "",
76
  "use_function_calling": false
77
  },
78
- "pad_token_id": 151643,
79
- "bos_token_id": 151643,
80
- "eos_token_id": [
81
- 151645,
82
- 151643
83
- ]
84
  }
 
11
  "num_key_value_heads": 4,
12
  "rms_norm_eps": 1e-06,
13
  "rope_theta": 1000000.0,
14
+ "vocab_size": 136488,
15
  "tie_word_embeddings": false,
16
+ "context_window_size": 256,
17
+ "prefill_chunk_size": 128,
18
  "tensor_parallel_shards": 1,
19
  "head_dim": 128,
20
  "dtype": "float32",
21
+ "max_batch_size": 1
22
  },
23
+ "context_length": 512,
24
+ "max_tokens": 128,
25
+ "vocab_size": 136488,
26
+ "context_window_size": 256,
27
  "sliding_window_size": -1,
28
+ "prefill_chunk_size": 128,
29
  "attention_sink_size": -1,
30
  "tensor_parallel_shards": 1,
31
  "pipeline_parallel_stages": 1,
 
47
  },
48
  "conv_template": {
49
  "name": "qwen2",
50
+ "system_template": "system\n{system_message}\n",
51
  "system_message": "You are a helpful assistant.",
52
  "system_prefix_token_ids": null,
53
  "add_role_after_system_message": true,
54
  "roles": {
55
+ "user": "user",
56
+ "assistant": "assistant"
57
  },
58
  "role_templates": {
59
  "user": "{user_message}",
 
62
  },
63
  "messages": [],
64
  "seps": [
65
+ "\n"
66
  ],
67
  "role_content_sep": "\n",
68
  "role_empty_sep": "\n",
69
  "stop_str": [
70
+ "\n\n"
 
71
  ],
72
  "stop_token_ids": [
73
+ 136487,
74
+ 136485
75
  ],
76
  "function_string": "",
77
  "use_function_calling": false
78
  },
79
+ "pad_token_id": 136485,
80
+ "bos_token_id": 136485,
81
+ "eos_token_id": 136487
 
 
 
82
  }
ndarray-cache.json CHANGED
@@ -1,47 +1,47 @@
1
  {
2
  "metadata": {
3
  "ParamSize": 31,
4
- "ParamBytes": 4044459008.0,
5
  "BitsPerParam": 16.0
6
  },
7
  "records": [
8
  {
9
  "dataPath": "params_shard_0.bin",
10
  "format": "raw-shard",
11
- "nbytes": 1089994752,
12
  "records": [
13
  {
14
- "name": "model.embed_tokens.weight",
15
  "shape": [
16
- 152064,
17
  3584
18
  ],
19
  "dtype": "float16",
20
  "format": "f32-to-bf16",
21
- "nbytes": 1089994752,
22
  "byteOffset": 0
23
  }
24
  ],
25
- "md5sum": "04271b91cafad82f630343dcc1156d75"
26
  },
27
  {
28
  "dataPath": "params_shard_1.bin",
29
  "format": "raw-shard",
30
- "nbytes": 25690112,
31
  "records": [
32
  {
33
- "name": "model.layers.0.self_attn.o_proj.weight",
34
  "shape": [
35
  3584,
36
- 3584
37
  ],
38
  "dtype": "float16",
39
  "format": "f32-to-bf16",
40
- "nbytes": 25690112,
41
  "byteOffset": 0
42
  }
43
  ],
44
- "md5sum": "23278f4c2c2bd26e27c57bf98ad4ef8c"
45
  },
46
  {
47
  "dataPath": "params_shard_2.bin",
@@ -49,7 +49,7 @@
49
  "nbytes": 271581184,
50
  "records": [
51
  {
52
- "name": "model.layers.0.mlp.gate_up_proj.weight",
53
  "shape": [
54
  37888,
55
  3584
@@ -60,83 +60,83 @@
60
  "byteOffset": 0
61
  }
62
  ],
63
- "md5sum": "eab8149d005afce177602e3a0502e2d5"
64
  },
65
  {
66
  "dataPath": "params_shard_3.bin",
67
  "format": "raw-shard",
68
- "nbytes": 135790592,
69
  "records": [
70
  {
71
- "name": "model.layers.0.mlp.down_proj.weight",
72
  "shape": [
73
- 3584,
74
- 18944
75
  ],
76
  "dtype": "float16",
77
  "format": "f32-to-bf16",
78
- "nbytes": 135790592,
79
  "byteOffset": 0
80
  }
81
  ],
82
- "md5sum": "d8c5636ba8c54a1bf9a9be52028edd24"
83
  },
84
  {
85
  "dataPath": "params_shard_4.bin",
86
  "format": "raw-shard",
87
- "nbytes": 33030144,
88
  "records": [
89
  {
90
- "name": "model.layers.1.self_attn.c_attn.weight",
91
  "shape": [
92
- 4608,
93
- 3584
94
  ],
95
  "dtype": "float16",
96
  "format": "f32-to-bf16",
97
- "nbytes": 33030144,
98
  "byteOffset": 0
99
  }
100
  ],
101
- "md5sum": "c03154c2e7e28390bd1554a72a582034"
102
  },
103
  {
104
  "dataPath": "params_shard_5.bin",
105
  "format": "raw-shard",
106
- "nbytes": 25690112,
107
  "records": [
108
  {
109
- "name": "model.layers.1.self_attn.o_proj.weight",
110
  "shape": [
111
- 3584,
112
  3584
113
  ],
114
  "dtype": "float16",
115
  "format": "f32-to-bf16",
116
- "nbytes": 25690112,
117
  "byteOffset": 0
118
  }
119
  ],
120
- "md5sum": "d46398b81504bd2d82830108afdd8407"
121
  },
122
  {
123
  "dataPath": "params_shard_6.bin",
124
  "format": "raw-shard",
125
- "nbytes": 271581184,
126
  "records": [
127
  {
128
- "name": "model.layers.1.mlp.gate_up_proj.weight",
129
  "shape": [
130
- 37888,
131
  3584
132
  ],
133
  "dtype": "float16",
134
  "format": "f32-to-bf16",
135
- "nbytes": 271581184,
136
  "byteOffset": 0
137
  }
138
  ],
139
- "md5sum": "92a7bea5fcddf575091c43b1c4df714c"
140
  },
141
  {
142
  "dataPath": "params_shard_7.bin",
@@ -160,59 +160,59 @@
160
  {
161
  "dataPath": "params_shard_8.bin",
162
  "format": "raw-shard",
163
- "nbytes": 33030144,
164
  "records": [
165
  {
166
- "name": "model.layers.2.self_attn.c_attn.weight",
167
  "shape": [
168
- 4608,
169
  3584
170
  ],
171
  "dtype": "float16",
172
  "format": "f32-to-bf16",
173
- "nbytes": 33030144,
174
  "byteOffset": 0
175
  }
176
  ],
177
- "md5sum": "8af6d905b41911171901c0c9b1309afb"
178
  },
179
  {
180
  "dataPath": "params_shard_9.bin",
181
  "format": "raw-shard",
182
- "nbytes": 25690112,
183
  "records": [
184
  {
185
- "name": "model.layers.2.self_attn.o_proj.weight",
186
  "shape": [
187
- 3584,
188
  3584
189
  ],
190
  "dtype": "float16",
191
  "format": "f32-to-bf16",
192
- "nbytes": 25690112,
193
  "byteOffset": 0
194
  }
195
  ],
196
- "md5sum": "126e17f7b67d299092e34621d3d01ba5"
197
  },
198
  {
199
  "dataPath": "params_shard_10.bin",
200
  "format": "raw-shard",
201
- "nbytes": 271581184,
202
  "records": [
203
  {
204
- "name": "model.layers.2.mlp.gate_up_proj.weight",
205
  "shape": [
206
- 37888,
207
  3584
208
  ],
209
  "dtype": "float16",
210
  "format": "f32-to-bf16",
211
- "nbytes": 271581184,
212
  "byteOffset": 0
213
  }
214
  ],
215
- "md5sum": "56b5611e7e548cbe8da292871a122b53"
216
  },
217
  {
218
  "dataPath": "params_shard_11.bin",
@@ -236,97 +236,97 @@
236
  {
237
  "dataPath": "params_shard_12.bin",
238
  "format": "raw-shard",
239
- "nbytes": 33030144,
240
  "records": [
241
  {
242
- "name": "model.layers.3.self_attn.c_attn.weight",
243
  "shape": [
244
- 4608,
245
  3584
246
  ],
247
  "dtype": "float16",
248
  "format": "f32-to-bf16",
249
- "nbytes": 33030144,
250
  "byteOffset": 0
251
  }
252
  ],
253
- "md5sum": "10c68f4a89d8c9e5ac3fd00b5ce2ee47"
254
  },
255
  {
256
  "dataPath": "params_shard_13.bin",
257
  "format": "raw-shard",
258
- "nbytes": 25690112,
259
  "records": [
260
  {
261
- "name": "model.layers.3.self_attn.o_proj.weight",
262
  "shape": [
263
- 3584,
264
  3584
265
  ],
266
  "dtype": "float16",
267
  "format": "f32-to-bf16",
268
- "nbytes": 25690112,
269
  "byteOffset": 0
270
  }
271
  ],
272
- "md5sum": "e655468aacdcefbc5172993aea2634e2"
273
  },
274
  {
275
  "dataPath": "params_shard_14.bin",
276
  "format": "raw-shard",
277
- "nbytes": 271581184,
278
  "records": [
279
  {
280
- "name": "model.layers.3.mlp.gate_up_proj.weight",
281
  "shape": [
282
- 37888,
283
  3584
284
  ],
285
  "dtype": "float16",
286
  "format": "f32-to-bf16",
287
- "nbytes": 271581184,
288
  "byteOffset": 0
289
  }
290
  ],
291
- "md5sum": "ce9430ea30508f4c11646d9f6987c7d5"
292
  },
293
  {
294
  "dataPath": "params_shard_15.bin",
295
  "format": "raw-shard",
296
- "nbytes": 135790592,
297
  "records": [
298
  {
299
- "name": "model.layers.3.mlp.down_proj.weight",
300
  "shape": [
301
- 3584,
302
- 18944
303
  ],
304
  "dtype": "float16",
305
  "format": "f32-to-bf16",
306
- "nbytes": 135790592,
307
  "byteOffset": 0
308
  }
309
  ],
310
- "md5sum": "8463225b5d39d8aa6befc7e394ff9a80"
311
  },
312
  {
313
  "dataPath": "params_shard_16.bin",
314
  "format": "raw-shard",
315
- "nbytes": 1089994752,
316
  "records": [
317
  {
318
- "name": "lm_head.weight",
319
  "shape": [
320
- 152064,
321
  3584
322
  ],
323
  "dtype": "float16",
324
  "format": "f32-to-bf16",
325
- "nbytes": 1089994752,
326
  "byteOffset": 0
327
  }
328
  ],
329
- "md5sum": "edc0c39312d7ac18516c511b41a46f02"
330
  },
331
  {
332
  "dataPath": "params_shard_17.bin",
@@ -334,25 +334,34 @@
334
  "nbytes": 33131520,
335
  "records": [
336
  {
337
- "name": "model.layers.0.self_attn.c_attn.weight",
338
  "shape": [
339
- 4608,
340
  3584
341
  ],
342
  "dtype": "float16",
343
  "format": "f32-to-bf16",
344
- "nbytes": 33030144,
345
  "byteOffset": 0
346
  },
347
  {
348
- "name": "model.layers.0.self_attn.c_attn.bias",
349
  "shape": [
350
- 4608
351
  ],
352
  "dtype": "float16",
353
  "format": "f32-to-bf16",
354
- "nbytes": 9216,
355
- "byteOffset": 33030144
 
 
 
 
 
 
 
 
 
 
356
  },
357
  {
358
  "name": "model.layers.0.input_layernorm.weight",
@@ -362,7 +371,7 @@
362
  "dtype": "float16",
363
  "format": "f32-to-bf16",
364
  "nbytes": 7168,
365
- "byteOffset": 33039360
366
  },
367
  {
368
  "name": "model.layers.0.post_attention_layernorm.weight",
@@ -372,17 +381,28 @@
372
  "dtype": "float16",
373
  "format": "f32-to-bf16",
374
  "nbytes": 7168,
375
- "byteOffset": 33046528
376
  },
377
  {
378
- "name": "model.layers.1.self_attn.c_attn.bias",
379
  "shape": [
380
  4608
381
  ],
382
  "dtype": "float16",
383
  "format": "f32-to-bf16",
384
  "nbytes": 9216,
385
- "byteOffset": 33053696
 
 
 
 
 
 
 
 
 
 
 
386
  },
387
  {
388
  "name": "model.layers.1.input_layernorm.weight",
@@ -392,7 +412,7 @@
392
  "dtype": "float16",
393
  "format": "f32-to-bf16",
394
  "nbytes": 7168,
395
- "byteOffset": 33062912
396
  },
397
  {
398
  "name": "model.layers.1.post_attention_layernorm.weight",
@@ -402,17 +422,17 @@
402
  "dtype": "float16",
403
  "format": "f32-to-bf16",
404
  "nbytes": 7168,
405
- "byteOffset": 33070080
406
  },
407
  {
408
- "name": "model.layers.2.self_attn.c_attn.bias",
409
  "shape": [
410
  4608
411
  ],
412
  "dtype": "float16",
413
  "format": "f32-to-bf16",
414
  "nbytes": 9216,
415
- "byteOffset": 33077248
416
  },
417
  {
418
  "name": "model.layers.2.input_layernorm.weight",
@@ -422,7 +442,7 @@
422
  "dtype": "float16",
423
  "format": "f32-to-bf16",
424
  "nbytes": 7168,
425
- "byteOffset": 33086464
426
  },
427
  {
428
  "name": "model.layers.2.post_attention_layernorm.weight",
@@ -432,50 +452,30 @@
432
  "dtype": "float16",
433
  "format": "f32-to-bf16",
434
  "nbytes": 7168,
435
- "byteOffset": 33093632
436
  },
437
  {
438
- "name": "model.layers.3.self_attn.c_attn.bias",
439
  "shape": [
440
  4608
441
  ],
442
  "dtype": "float16",
443
  "format": "f32-to-bf16",
444
  "nbytes": 9216,
445
- "byteOffset": 33100800
446
  },
447
  {
448
- "name": "model.layers.3.input_layernorm.weight",
449
- "shape": [
450
- 3584
451
- ],
452
- "dtype": "float16",
453
- "format": "f32-to-bf16",
454
- "nbytes": 7168,
455
- "byteOffset": 33110016
456
- },
457
- {
458
- "name": "model.layers.3.post_attention_layernorm.weight",
459
- "shape": [
460
- 3584
461
- ],
462
- "dtype": "float16",
463
- "format": "f32-to-bf16",
464
- "nbytes": 7168,
465
- "byteOffset": 33117184
466
- },
467
- {
468
- "name": "model.norm.weight",
469
  "shape": [
470
- 3584
471
  ],
472
  "dtype": "float16",
473
  "format": "f32-to-bf16",
474
- "nbytes": 7168,
475
- "byteOffset": 33124352
476
  }
477
  ],
478
- "md5sum": "70fbdf29d3964389bcae6e3d22e00b4c"
479
  }
480
  ]
481
  }
 
1
  {
2
  "metadata": {
3
  "ParamSize": 31,
4
+ "ParamBytes": 3821161472.0,
5
  "BitsPerParam": 16.0
6
  },
7
  "records": [
8
  {
9
  "dataPath": "params_shard_0.bin",
10
  "format": "raw-shard",
11
+ "nbytes": 978345984,
12
  "records": [
13
  {
14
+ "name": "lm_head.weight",
15
  "shape": [
16
+ 136488,
17
  3584
18
  ],
19
  "dtype": "float16",
20
  "format": "f32-to-bf16",
21
+ "nbytes": 978345984,
22
  "byteOffset": 0
23
  }
24
  ],
25
+ "md5sum": "f24bc2a390a565ec24307333d9aa9b5b"
26
  },
27
  {
28
  "dataPath": "params_shard_1.bin",
29
  "format": "raw-shard",
30
+ "nbytes": 135790592,
31
  "records": [
32
  {
33
+ "name": "model.layers.3.mlp.down_proj.weight",
34
  "shape": [
35
  3584,
36
+ 18944
37
  ],
38
  "dtype": "float16",
39
  "format": "f32-to-bf16",
40
+ "nbytes": 135790592,
41
  "byteOffset": 0
42
  }
43
  ],
44
+ "md5sum": "8463225b5d39d8aa6befc7e394ff9a80"
45
  },
46
  {
47
  "dataPath": "params_shard_2.bin",
 
49
  "nbytes": 271581184,
50
  "records": [
51
  {
52
+ "name": "model.layers.3.mlp.gate_up_proj.weight",
53
  "shape": [
54
  37888,
55
  3584
 
60
  "byteOffset": 0
61
  }
62
  ],
63
+ "md5sum": "ce9430ea30508f4c11646d9f6987c7d5"
64
  },
65
  {
66
  "dataPath": "params_shard_3.bin",
67
  "format": "raw-shard",
68
+ "nbytes": 978345984,
69
  "records": [
70
  {
71
+ "name": "model.embed_tokens.weight",
72
  "shape": [
73
+ 136488,
74
+ 3584
75
  ],
76
  "dtype": "float16",
77
  "format": "f32-to-bf16",
78
+ "nbytes": 978345984,
79
  "byteOffset": 0
80
  }
81
  ],
82
+ "md5sum": "2034469190648e591720406f8f5d6931"
83
  },
84
  {
85
  "dataPath": "params_shard_4.bin",
86
  "format": "raw-shard",
87
+ "nbytes": 135790592,
88
  "records": [
89
  {
90
+ "name": "model.layers.0.mlp.down_proj.weight",
91
  "shape": [
92
+ 3584,
93
+ 18944
94
  ],
95
  "dtype": "float16",
96
  "format": "f32-to-bf16",
97
+ "nbytes": 135790592,
98
  "byteOffset": 0
99
  }
100
  ],
101
+ "md5sum": "d8c5636ba8c54a1bf9a9be52028edd24"
102
  },
103
  {
104
  "dataPath": "params_shard_5.bin",
105
  "format": "raw-shard",
106
+ "nbytes": 271581184,
107
  "records": [
108
  {
109
+ "name": "model.layers.0.mlp.gate_up_proj.weight",
110
  "shape": [
111
+ 37888,
112
  3584
113
  ],
114
  "dtype": "float16",
115
  "format": "f32-to-bf16",
116
+ "nbytes": 271581184,
117
  "byteOffset": 0
118
  }
119
  ],
120
+ "md5sum": "eab8149d005afce177602e3a0502e2d5"
121
  },
122
  {
123
  "dataPath": "params_shard_6.bin",
124
  "format": "raw-shard",
125
+ "nbytes": 25690112,
126
  "records": [
127
  {
128
+ "name": "model.layers.0.self_attn.o_proj.weight",
129
  "shape": [
130
+ 3584,
131
  3584
132
  ],
133
  "dtype": "float16",
134
  "format": "f32-to-bf16",
135
+ "nbytes": 25690112,
136
  "byteOffset": 0
137
  }
138
  ],
139
+ "md5sum": "23278f4c2c2bd26e27c57bf98ad4ef8c"
140
  },
141
  {
142
  "dataPath": "params_shard_7.bin",
 
160
  {
161
  "dataPath": "params_shard_8.bin",
162
  "format": "raw-shard",
163
+ "nbytes": 271581184,
164
  "records": [
165
  {
166
+ "name": "model.layers.1.mlp.gate_up_proj.weight",
167
  "shape": [
168
+ 37888,
169
  3584
170
  ],
171
  "dtype": "float16",
172
  "format": "f32-to-bf16",
173
+ "nbytes": 271581184,
174
  "byteOffset": 0
175
  }
176
  ],
177
+ "md5sum": "92a7bea5fcddf575091c43b1c4df714c"
178
  },
179
  {
180
  "dataPath": "params_shard_9.bin",
181
  "format": "raw-shard",
182
+ "nbytes": 33030144,
183
  "records": [
184
  {
185
+ "name": "model.layers.1.self_attn.c_attn.weight",
186
  "shape": [
187
+ 4608,
188
  3584
189
  ],
190
  "dtype": "float16",
191
  "format": "f32-to-bf16",
192
+ "nbytes": 33030144,
193
  "byteOffset": 0
194
  }
195
  ],
196
+ "md5sum": "c03154c2e7e28390bd1554a72a582034"
197
  },
198
  {
199
  "dataPath": "params_shard_10.bin",
200
  "format": "raw-shard",
201
+ "nbytes": 25690112,
202
  "records": [
203
  {
204
+ "name": "model.layers.1.self_attn.o_proj.weight",
205
  "shape": [
206
+ 3584,
207
  3584
208
  ],
209
  "dtype": "float16",
210
  "format": "f32-to-bf16",
211
+ "nbytes": 25690112,
212
  "byteOffset": 0
213
  }
214
  ],
215
+ "md5sum": "d46398b81504bd2d82830108afdd8407"
216
  },
217
  {
218
  "dataPath": "params_shard_11.bin",
 
236
  {
237
  "dataPath": "params_shard_12.bin",
238
  "format": "raw-shard",
239
+ "nbytes": 271581184,
240
  "records": [
241
  {
242
+ "name": "model.layers.2.mlp.gate_up_proj.weight",
243
  "shape": [
244
+ 37888,
245
  3584
246
  ],
247
  "dtype": "float16",
248
  "format": "f32-to-bf16",
249
+ "nbytes": 271581184,
250
  "byteOffset": 0
251
  }
252
  ],
253
+ "md5sum": "56b5611e7e548cbe8da292871a122b53"
254
  },
255
  {
256
  "dataPath": "params_shard_13.bin",
257
  "format": "raw-shard",
258
+ "nbytes": 33030144,
259
  "records": [
260
  {
261
+ "name": "model.layers.2.self_attn.c_attn.weight",
262
  "shape": [
263
+ 4608,
264
  3584
265
  ],
266
  "dtype": "float16",
267
  "format": "f32-to-bf16",
268
+ "nbytes": 33030144,
269
  "byteOffset": 0
270
  }
271
  ],
272
+ "md5sum": "8af6d905b41911171901c0c9b1309afb"
273
  },
274
  {
275
  "dataPath": "params_shard_14.bin",
276
  "format": "raw-shard",
277
+ "nbytes": 25690112,
278
  "records": [
279
  {
280
+ "name": "model.layers.2.self_attn.o_proj.weight",
281
  "shape": [
282
+ 3584,
283
  3584
284
  ],
285
  "dtype": "float16",
286
  "format": "f32-to-bf16",
287
+ "nbytes": 25690112,
288
  "byteOffset": 0
289
  }
290
  ],
291
+ "md5sum": "126e17f7b67d299092e34621d3d01ba5"
292
  },
293
  {
294
  "dataPath": "params_shard_15.bin",
295
  "format": "raw-shard",
296
+ "nbytes": 33030144,
297
  "records": [
298
  {
299
+ "name": "model.layers.3.self_attn.c_attn.weight",
300
  "shape": [
301
+ 4608,
302
+ 3584
303
  ],
304
  "dtype": "float16",
305
  "format": "f32-to-bf16",
306
+ "nbytes": 33030144,
307
  "byteOffset": 0
308
  }
309
  ],
310
+ "md5sum": "10c68f4a89d8c9e5ac3fd00b5ce2ee47"
311
  },
312
  {
313
  "dataPath": "params_shard_16.bin",
314
  "format": "raw-shard",
315
+ "nbytes": 25690112,
316
  "records": [
317
  {
318
+ "name": "model.layers.3.self_attn.o_proj.weight",
319
  "shape": [
320
+ 3584,
321
  3584
322
  ],
323
  "dtype": "float16",
324
  "format": "f32-to-bf16",
325
+ "nbytes": 25690112,
326
  "byteOffset": 0
327
  }
328
  ],
329
+ "md5sum": "e655468aacdcefbc5172993aea2634e2"
330
  },
331
  {
332
  "dataPath": "params_shard_17.bin",
 
334
  "nbytes": 33131520,
335
  "records": [
336
  {
337
+ "name": "model.layers.3.input_layernorm.weight",
338
  "shape": [
 
339
  3584
340
  ],
341
  "dtype": "float16",
342
  "format": "f32-to-bf16",
343
+ "nbytes": 7168,
344
  "byteOffset": 0
345
  },
346
  {
347
+ "name": "model.layers.3.post_attention_layernorm.weight",
348
  "shape": [
349
+ 3584
350
  ],
351
  "dtype": "float16",
352
  "format": "f32-to-bf16",
353
+ "nbytes": 7168,
354
+ "byteOffset": 7168
355
+ },
356
+ {
357
+ "name": "model.norm.weight",
358
+ "shape": [
359
+ 3584
360
+ ],
361
+ "dtype": "float16",
362
+ "format": "f32-to-bf16",
363
+ "nbytes": 7168,
364
+ "byteOffset": 14336
365
  },
366
  {
367
  "name": "model.layers.0.input_layernorm.weight",
 
371
  "dtype": "float16",
372
  "format": "f32-to-bf16",
373
  "nbytes": 7168,
374
+ "byteOffset": 21504
375
  },
376
  {
377
  "name": "model.layers.0.post_attention_layernorm.weight",
 
381
  "dtype": "float16",
382
  "format": "f32-to-bf16",
383
  "nbytes": 7168,
384
+ "byteOffset": 28672
385
  },
386
  {
387
+ "name": "model.layers.0.self_attn.c_attn.bias",
388
  "shape": [
389
  4608
390
  ],
391
  "dtype": "float16",
392
  "format": "f32-to-bf16",
393
  "nbytes": 9216,
394
+ "byteOffset": 35840
395
+ },
396
+ {
397
+ "name": "model.layers.0.self_attn.c_attn.weight",
398
+ "shape": [
399
+ 4608,
400
+ 3584
401
+ ],
402
+ "dtype": "float16",
403
+ "format": "f32-to-bf16",
404
+ "nbytes": 33030144,
405
+ "byteOffset": 45056
406
  },
407
  {
408
  "name": "model.layers.1.input_layernorm.weight",
 
412
  "dtype": "float16",
413
  "format": "f32-to-bf16",
414
  "nbytes": 7168,
415
+ "byteOffset": 33075200
416
  },
417
  {
418
  "name": "model.layers.1.post_attention_layernorm.weight",
 
422
  "dtype": "float16",
423
  "format": "f32-to-bf16",
424
  "nbytes": 7168,
425
+ "byteOffset": 33082368
426
  },
427
  {
428
+ "name": "model.layers.1.self_attn.c_attn.bias",
429
  "shape": [
430
  4608
431
  ],
432
  "dtype": "float16",
433
  "format": "f32-to-bf16",
434
  "nbytes": 9216,
435
+ "byteOffset": 33089536
436
  },
437
  {
438
  "name": "model.layers.2.input_layernorm.weight",
 
442
  "dtype": "float16",
443
  "format": "f32-to-bf16",
444
  "nbytes": 7168,
445
+ "byteOffset": 33098752
446
  },
447
  {
448
  "name": "model.layers.2.post_attention_layernorm.weight",
 
452
  "dtype": "float16",
453
  "format": "f32-to-bf16",
454
  "nbytes": 7168,
455
+ "byteOffset": 33105920
456
  },
457
  {
458
+ "name": "model.layers.2.self_attn.c_attn.bias",
459
  "shape": [
460
  4608
461
  ],
462
  "dtype": "float16",
463
  "format": "f32-to-bf16",
464
  "nbytes": 9216,
465
+ "byteOffset": 33113088
466
  },
467
  {
468
+ "name": "model.layers.3.self_attn.c_attn.bias",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
469
  "shape": [
470
+ 4608
471
  ],
472
  "dtype": "float16",
473
  "format": "f32-to-bf16",
474
+ "nbytes": 9216,
475
+ "byteOffset": 33122304
476
  }
477
  ],
478
+ "md5sum": "8024ca13796190a329892865444cd3d9"
479
  }
480
  ]
481
  }
params_shard_0.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fb41eaf0940118f3c87f636fa0c6f644241134077ce70ced4c4a9cf4e5ed3b4
3
- size 1089994752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:027bda185ff72d37320fa16426ab0f7c7128aab2ac9eb72d8657340e48d90e4d
3
+ size 978345984
params_shard_1.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63265fae190fd35ce4b7952fd15222dc24e73e41176bc971d653fbe0fb182f28
3
- size 25690112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89b4abaaa000ea810b43c6a40e7215ef23bf9774e4d533534fdef78066e023b9
3
+ size 135790592
params_shard_10.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63ac62dd8140674016640232e972f3380a5282f1044d9766d064e8fc28c314b0
3
- size 271581184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0db85a6033bbbb1b6e4d82705b9c21312baf17aeeb65ecf05f39efc956208eb
3
+ size 25690112
params_shard_12.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b27adf93985b8861804b61c9ca36d86afd88dbdd1a16256a20bf0539dae8084
3
- size 33030144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63ac62dd8140674016640232e972f3380a5282f1044d9766d064e8fc28c314b0
3
+ size 271581184
params_shard_13.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa46bfc1cd163cbaaa7fcc46c8599c529c757c59a3235a026692034ec84a65c8
3
- size 25690112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e10b68357f9f8d8ea44347b78c4ddd51a120f2402b8f157d732087e37d2e993
3
+ size 33030144
params_shard_14.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c08916833966babd12d02deefb4f0745b0f6e095a6fc08fb142e3228799cdaa
3
- size 271581184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47cfd3df2045c382bd859dc5255ebf516b4bf8c8dc4c5453385b86e36102d9f5
3
+ size 25690112
params_shard_15.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89b4abaaa000ea810b43c6a40e7215ef23bf9774e4d533534fdef78066e023b9
3
- size 135790592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b27adf93985b8861804b61c9ca36d86afd88dbdd1a16256a20bf0539dae8084
3
+ size 33030144
params_shard_16.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a25d1741800bee6c035ed1e9c9e6c08954b2d00518e9954b62f70a07df6254c
3
- size 1089994752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa46bfc1cd163cbaaa7fcc46c8599c529c757c59a3235a026692034ec84a65c8
3
+ size 25690112
params_shard_17.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f0190cb193bbb1ca4d6bfe83caee79e29d4e8bcf1fbdd283c44798f73e81dbf
3
  size 33131520
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d21c896dba637025e09d1bc08af03e6399a8c2c1e5c385aa70dd90232cfbc9c7
3
  size 33131520
params_shard_2.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a184d95f1ec81da8b912cc4f609c0c6dda201ee367c813d9f247aadf97d3e500
3
  size 271581184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c08916833966babd12d02deefb4f0745b0f6e095a6fc08fb142e3228799cdaa
3
  size 271581184
params_shard_3.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e351746ffadbc023756d7247b2a8b19eb7a77bccf1a20447a8e90e8b20f2a04c
3
- size 135790592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fe753df73072b4952a933e0600c8e4b8cf9f2be71a6025d53ef87f54b967d64
3
+ size 978345984
params_shard_4.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f66f118bf0e88c2995e92e965c6cf745181d48c54429ab8c19cfea311eedea2c
3
- size 33030144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e351746ffadbc023756d7247b2a8b19eb7a77bccf1a20447a8e90e8b20f2a04c
3
+ size 135790592
params_shard_5.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0db85a6033bbbb1b6e4d82705b9c21312baf17aeeb65ecf05f39efc956208eb
3
- size 25690112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a184d95f1ec81da8b912cc4f609c0c6dda201ee367c813d9f247aadf97d3e500
3
+ size 271581184
params_shard_6.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48f19809f0965814f1caea799976e36c80c8b0350abccc96833fe1de430322a2
3
- size 271581184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63265fae190fd35ce4b7952fd15222dc24e73e41176bc971d653fbe0fb182f28
3
+ size 25690112
params_shard_8.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e10b68357f9f8d8ea44347b78c4ddd51a120f2402b8f157d732087e37d2e993
3
- size 33030144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48f19809f0965814f1caea799976e36c80c8b0350abccc96833fe1de430322a2
3
+ size 271581184
params_shard_9.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47cfd3df2045c382bd859dc5255ebf516b4bf8c8dc4c5453385b86e36102d9f5
3
- size 25690112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f66f118bf0e88c2995e92e965c6cf745181d48c54429ab8c19cfea311eedea2c
3
+ size 33030144