zz814 commited on
Commit
9d39a56
1 Parent(s): 7b71a79

Upload 24 files

Browse files
mlc-chat-config.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "llama",
3
+ "quantization": "q4f32_1",
4
+ "model_config": {
5
+ "hidden_size": 4096,
6
+ "intermediate_size": 11008,
7
+ "num_attention_heads": 32,
8
+ "num_hidden_layers": 4,
9
+ "rms_norm_eps": 1e-05,
10
+ "vocab_size": 55296,
11
+ "position_embedding_base": 10000.0,
12
+ "context_window_size": 4096,
13
+ "prefill_chunk_size": 2048,
14
+ "num_key_value_heads": 32,
15
+ "head_dim": 128,
16
+ "tensor_parallel_shards": 1,
17
+ "max_batch_size": 80
18
+ },
19
+ "vocab_size": 55296,
20
+ "context_window_size": 4096,
21
+ "sliding_window_size": -1,
22
+ "prefill_chunk_size": 2048,
23
+ "attention_sink_size": -1,
24
+ "tensor_parallel_shards": 1,
25
+ "mean_gen_len": 128,
26
+ "max_gen_len": 512,
27
+ "shift_fill_factor": 0.3,
28
+ "temperature": 0.2,
29
+ "presence_penalty": 0.0,
30
+ "frequency_penalty": 0.0,
31
+ "repetition_penalty": 1.0,
32
+ "top_p": 0.9,
33
+ "conv_template": {
34
+ "name": "llama-2",
35
+ "system_template": "[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n",
36
+ "system_message": "You are a helpful, respectful and honest assistant.",
37
+ "system_prefix_token_ids": [
38
+ 1
39
+ ],
40
+ "add_role_after_system_message": false,
41
+ "roles": {
42
+ "user": "<s>[INST]",
43
+ "assistant": "[/INST]",
44
+ "tool": "[INST]"
45
+ },
46
+ "role_templates": {
47
+ "user": "{user_message}",
48
+ "assistant": "{assistant_message}",
49
+ "tool": "{tool_message}"
50
+ },
51
+ "messages": [],
52
+ "seps": [
53
+ " ",
54
+ " </s>"
55
+ ],
56
+ "role_content_sep": " ",
57
+ "role_empty_sep": " ",
58
+ "stop_str": [
59
+ "[INST]"
60
+ ],
61
+ "stop_token_ids": [
62
+ 2
63
+ ],
64
+ "function_string": "",
65
+ "use_function_calling": false
66
+ },
67
+ "pad_token_id": 0,
68
+ "bos_token_id": 1,
69
+ "eos_token_id": 2,
70
+ "tokenizer_files": [
71
+ "tokenizer.model",
72
+ "tokenizer.json",
73
+ "tokenizer_config.json"
74
+ ],
75
+ "token_table_postproc_method": "byte_fallback",
76
+ "version": "0.1.0"
77
+ }
ndarray-cache-b16.json ADDED
@@ -0,0 +1,639 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 45,
4
+ "ParamBytes": 789200896.0,
5
+ "BitsPerParam": 5.000788364646225
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 113246208,
12
+ "records": [
13
+ {
14
+ "name": "model.embed_tokens.q_weight",
15
+ "shape": [
16
+ 55296,
17
+ 512
18
+ ],
19
+ "dtype": "uint32",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 113246208,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "fe870c24c37d50833f35e5356e23fa21"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 25165824,
31
+ "records": [
32
+ {
33
+ "name": "model.layers.0.self_attn.qkv_proj.q_weight",
34
+ "shape": [
35
+ 12288,
36
+ 512
37
+ ],
38
+ "dtype": "uint32",
39
+ "format": "f32-to-bf16",
40
+ "nbytes": 25165824,
41
+ "byteOffset": 0
42
+ }
43
+ ],
44
+ "md5sum": "2732fee74ddceeec2a570c77bcf3a75f"
45
+ },
46
+ {
47
+ "dataPath": "params_shard_2.bin",
48
+ "format": "raw-shard",
49
+ "nbytes": 45088768,
50
+ "records": [
51
+ {
52
+ "name": "model.layers.0.mlp.gate_up_proj.q_weight",
53
+ "shape": [
54
+ 22016,
55
+ 512
56
+ ],
57
+ "dtype": "uint32",
58
+ "format": "f32-to-bf16",
59
+ "nbytes": 45088768,
60
+ "byteOffset": 0
61
+ }
62
+ ],
63
+ "md5sum": "5583cabee6e1df6bc137f2495e41aacb"
64
+ },
65
+ {
66
+ "dataPath": "params_shard_3.bin",
67
+ "format": "raw-shard",
68
+ "nbytes": 22544384,
69
+ "records": [
70
+ {
71
+ "name": "model.layers.0.mlp.down_proj.q_weight",
72
+ "shape": [
73
+ 4096,
74
+ 1376
75
+ ],
76
+ "dtype": "uint32",
77
+ "format": "f32-to-bf16",
78
+ "nbytes": 22544384,
79
+ "byteOffset": 0
80
+ }
81
+ ],
82
+ "md5sum": "cea6f0488cca4a33509e06aeb87e9658"
83
+ },
84
+ {
85
+ "dataPath": "params_shard_4.bin",
86
+ "format": "raw-shard",
87
+ "nbytes": 32374784,
88
+ "records": [
89
+ {
90
+ "name": "model.embed_tokens.q_scale",
91
+ "shape": [
92
+ 55296,
93
+ 128
94
+ ],
95
+ "dtype": "bfloat16",
96
+ "format": "raw",
97
+ "nbytes": 14155776,
98
+ "byteOffset": 0
99
+ },
100
+ {
101
+ "name": "model.layers.0.self_attn.qkv_proj.q_scale",
102
+ "shape": [
103
+ 12288,
104
+ 128
105
+ ],
106
+ "dtype": "bfloat16",
107
+ "format": "raw",
108
+ "nbytes": 3145728,
109
+ "byteOffset": 14155776
110
+ },
111
+ {
112
+ "name": "model.layers.0.self_attn.o_proj.q_weight",
113
+ "shape": [
114
+ 4096,
115
+ 512
116
+ ],
117
+ "dtype": "uint32",
118
+ "format": "f32-to-bf16",
119
+ "nbytes": 8388608,
120
+ "byteOffset": 17301504
121
+ },
122
+ {
123
+ "name": "model.layers.0.self_attn.o_proj.q_scale",
124
+ "shape": [
125
+ 4096,
126
+ 128
127
+ ],
128
+ "dtype": "bfloat16",
129
+ "format": "raw",
130
+ "nbytes": 1048576,
131
+ "byteOffset": 25690112
132
+ },
133
+ {
134
+ "name": "model.layers.0.mlp.gate_up_proj.q_scale",
135
+ "shape": [
136
+ 22016,
137
+ 128
138
+ ],
139
+ "dtype": "bfloat16",
140
+ "format": "raw",
141
+ "nbytes": 5636096,
142
+ "byteOffset": 26738688
143
+ }
144
+ ],
145
+ "md5sum": "b6cdce6d80073579dc41fd1cea4b3f2b"
146
+ },
147
+ {
148
+ "dataPath": "params_shard_5.bin",
149
+ "format": "raw-shard",
150
+ "nbytes": 31145984,
151
+ "records": [
152
+ {
153
+ "name": "model.layers.0.mlp.down_proj.q_scale",
154
+ "shape": [
155
+ 4096,
156
+ 344
157
+ ],
158
+ "dtype": "bfloat16",
159
+ "format": "raw",
160
+ "nbytes": 2818048,
161
+ "byteOffset": 0
162
+ },
163
+ {
164
+ "name": "model.layers.0.input_layernorm.weight",
165
+ "shape": [
166
+ 4096
167
+ ],
168
+ "dtype": "bfloat16",
169
+ "format": "raw",
170
+ "nbytes": 8192,
171
+ "byteOffset": 2818048
172
+ },
173
+ {
174
+ "name": "model.layers.0.post_attention_layernorm.weight",
175
+ "shape": [
176
+ 4096
177
+ ],
178
+ "dtype": "bfloat16",
179
+ "format": "raw",
180
+ "nbytes": 8192,
181
+ "byteOffset": 2826240
182
+ },
183
+ {
184
+ "name": "model.layers.1.self_attn.qkv_proj.q_weight",
185
+ "shape": [
186
+ 12288,
187
+ 512
188
+ ],
189
+ "dtype": "uint32",
190
+ "format": "f32-to-bf16",
191
+ "nbytes": 25165824,
192
+ "byteOffset": 2834432
193
+ },
194
+ {
195
+ "name": "model.layers.1.self_attn.qkv_proj.q_scale",
196
+ "shape": [
197
+ 12288,
198
+ 128
199
+ ],
200
+ "dtype": "bfloat16",
201
+ "format": "raw",
202
+ "nbytes": 3145728,
203
+ "byteOffset": 28000256
204
+ }
205
+ ],
206
+ "md5sum": "24947fc54d4d479312e736080fe47c06"
207
+ },
208
+ {
209
+ "dataPath": "params_shard_6.bin",
210
+ "format": "raw-shard",
211
+ "nbytes": 45088768,
212
+ "records": [
213
+ {
214
+ "name": "model.layers.1.mlp.gate_up_proj.q_weight",
215
+ "shape": [
216
+ 22016,
217
+ 512
218
+ ],
219
+ "dtype": "uint32",
220
+ "format": "f32-to-bf16",
221
+ "nbytes": 45088768,
222
+ "byteOffset": 0
223
+ }
224
+ ],
225
+ "md5sum": "f81606af90319436b0c52bc3087a8a75"
226
+ },
227
+ {
228
+ "dataPath": "params_shard_7.bin",
229
+ "format": "raw-shard",
230
+ "nbytes": 22544384,
231
+ "records": [
232
+ {
233
+ "name": "model.layers.1.mlp.down_proj.q_weight",
234
+ "shape": [
235
+ 4096,
236
+ 1376
237
+ ],
238
+ "dtype": "uint32",
239
+ "format": "f32-to-bf16",
240
+ "nbytes": 22544384,
241
+ "byteOffset": 0
242
+ }
243
+ ],
244
+ "md5sum": "2f3b104d6012ee96282f1cb68e9a540a"
245
+ },
246
+ {
247
+ "dataPath": "params_shard_8.bin",
248
+ "format": "raw-shard",
249
+ "nbytes": 25165824,
250
+ "records": [
251
+ {
252
+ "name": "model.layers.2.self_attn.qkv_proj.q_weight",
253
+ "shape": [
254
+ 12288,
255
+ 512
256
+ ],
257
+ "dtype": "uint32",
258
+ "format": "f32-to-bf16",
259
+ "nbytes": 25165824,
260
+ "byteOffset": 0
261
+ }
262
+ ],
263
+ "md5sum": "92644b7b40d42f6a8f79a2780b4287e0"
264
+ },
265
+ {
266
+ "dataPath": "params_shard_9.bin",
267
+ "format": "raw-shard",
268
+ "nbytes": 45088768,
269
+ "records": [
270
+ {
271
+ "name": "model.layers.2.mlp.gate_up_proj.q_weight",
272
+ "shape": [
273
+ 22016,
274
+ 512
275
+ ],
276
+ "dtype": "uint32",
277
+ "format": "f32-to-bf16",
278
+ "nbytes": 45088768,
279
+ "byteOffset": 0
280
+ }
281
+ ],
282
+ "md5sum": "cbc67bc2b9741282cdba7dc02cf94d88"
283
+ },
284
+ {
285
+ "dataPath": "params_shard_10.bin",
286
+ "format": "raw-shard",
287
+ "nbytes": 30490624,
288
+ "records": [
289
+ {
290
+ "name": "model.layers.1.self_attn.o_proj.q_weight",
291
+ "shape": [
292
+ 4096,
293
+ 512
294
+ ],
295
+ "dtype": "uint32",
296
+ "format": "f32-to-bf16",
297
+ "nbytes": 8388608,
298
+ "byteOffset": 0
299
+ },
300
+ {
301
+ "name": "model.layers.1.self_attn.o_proj.q_scale",
302
+ "shape": [
303
+ 4096,
304
+ 128
305
+ ],
306
+ "dtype": "bfloat16",
307
+ "format": "raw",
308
+ "nbytes": 1048576,
309
+ "byteOffset": 8388608
310
+ },
311
+ {
312
+ "name": "model.layers.1.mlp.gate_up_proj.q_scale",
313
+ "shape": [
314
+ 22016,
315
+ 128
316
+ ],
317
+ "dtype": "bfloat16",
318
+ "format": "raw",
319
+ "nbytes": 5636096,
320
+ "byteOffset": 9437184
321
+ },
322
+ {
323
+ "name": "model.layers.1.mlp.down_proj.q_scale",
324
+ "shape": [
325
+ 4096,
326
+ 344
327
+ ],
328
+ "dtype": "bfloat16",
329
+ "format": "raw",
330
+ "nbytes": 2818048,
331
+ "byteOffset": 15073280
332
+ },
333
+ {
334
+ "name": "model.layers.1.input_layernorm.weight",
335
+ "shape": [
336
+ 4096
337
+ ],
338
+ "dtype": "bfloat16",
339
+ "format": "raw",
340
+ "nbytes": 8192,
341
+ "byteOffset": 17891328
342
+ },
343
+ {
344
+ "name": "model.layers.1.post_attention_layernorm.weight",
345
+ "shape": [
346
+ 4096
347
+ ],
348
+ "dtype": "bfloat16",
349
+ "format": "raw",
350
+ "nbytes": 8192,
351
+ "byteOffset": 17899520
352
+ },
353
+ {
354
+ "name": "model.layers.2.self_attn.qkv_proj.q_scale",
355
+ "shape": [
356
+ 12288,
357
+ 128
358
+ ],
359
+ "dtype": "bfloat16",
360
+ "format": "raw",
361
+ "nbytes": 3145728,
362
+ "byteOffset": 17907712
363
+ },
364
+ {
365
+ "name": "model.layers.2.self_attn.o_proj.q_weight",
366
+ "shape": [
367
+ 4096,
368
+ 512
369
+ ],
370
+ "dtype": "uint32",
371
+ "format": "f32-to-bf16",
372
+ "nbytes": 8388608,
373
+ "byteOffset": 21053440
374
+ },
375
+ {
376
+ "name": "model.layers.2.self_attn.o_proj.q_scale",
377
+ "shape": [
378
+ 4096,
379
+ 128
380
+ ],
381
+ "dtype": "bfloat16",
382
+ "format": "raw",
383
+ "nbytes": 1048576,
384
+ "byteOffset": 29442048
385
+ }
386
+ ],
387
+ "md5sum": "99fa11d45465d2a56e742b32f1f9a1d8"
388
+ },
389
+ {
390
+ "dataPath": "params_shard_11.bin",
391
+ "format": "raw-shard",
392
+ "nbytes": 25165824,
393
+ "records": [
394
+ {
395
+ "name": "model.layers.3.self_attn.qkv_proj.q_weight",
396
+ "shape": [
397
+ 12288,
398
+ 512
399
+ ],
400
+ "dtype": "uint32",
401
+ "format": "f32-to-bf16",
402
+ "nbytes": 25165824,
403
+ "byteOffset": 0
404
+ }
405
+ ],
406
+ "md5sum": "e012050a37a0fcec4e06efd599d37d51"
407
+ },
408
+ {
409
+ "dataPath": "params_shard_12.bin",
410
+ "format": "raw-shard",
411
+ "nbytes": 31014912,
412
+ "records": [
413
+ {
414
+ "name": "model.layers.2.mlp.gate_up_proj.q_scale",
415
+ "shape": [
416
+ 22016,
417
+ 128
418
+ ],
419
+ "dtype": "bfloat16",
420
+ "format": "raw",
421
+ "nbytes": 5636096,
422
+ "byteOffset": 0
423
+ },
424
+ {
425
+ "name": "model.layers.2.mlp.down_proj.q_weight",
426
+ "shape": [
427
+ 4096,
428
+ 1376
429
+ ],
430
+ "dtype": "uint32",
431
+ "format": "f32-to-bf16",
432
+ "nbytes": 22544384,
433
+ "byteOffset": 5636096
434
+ },
435
+ {
436
+ "name": "model.layers.2.mlp.down_proj.q_scale",
437
+ "shape": [
438
+ 4096,
439
+ 344
440
+ ],
441
+ "dtype": "bfloat16",
442
+ "format": "raw",
443
+ "nbytes": 2818048,
444
+ "byteOffset": 28180480
445
+ },
446
+ {
447
+ "name": "model.layers.2.input_layernorm.weight",
448
+ "shape": [
449
+ 4096
450
+ ],
451
+ "dtype": "bfloat16",
452
+ "format": "raw",
453
+ "nbytes": 8192,
454
+ "byteOffset": 30998528
455
+ },
456
+ {
457
+ "name": "model.layers.2.post_attention_layernorm.weight",
458
+ "shape": [
459
+ 4096
460
+ ],
461
+ "dtype": "bfloat16",
462
+ "format": "raw",
463
+ "nbytes": 8192,
464
+ "byteOffset": 31006720
465
+ }
466
+ ],
467
+ "md5sum": "4e0c7558b16811665b346390a73a8911"
468
+ },
469
+ {
470
+ "dataPath": "params_shard_13.bin",
471
+ "format": "raw-shard",
472
+ "nbytes": 45088768,
473
+ "records": [
474
+ {
475
+ "name": "model.layers.3.mlp.gate_up_proj.q_weight",
476
+ "shape": [
477
+ 22016,
478
+ 512
479
+ ],
480
+ "dtype": "uint32",
481
+ "format": "f32-to-bf16",
482
+ "nbytes": 45088768,
483
+ "byteOffset": 0
484
+ }
485
+ ],
486
+ "md5sum": "b969fb28468abdae8a625930d384bdec"
487
+ },
488
+ {
489
+ "dataPath": "params_shard_14.bin",
490
+ "format": "raw-shard",
491
+ "nbytes": 22544384,
492
+ "records": [
493
+ {
494
+ "name": "model.layers.3.mlp.down_proj.q_weight",
495
+ "shape": [
496
+ 4096,
497
+ 1376
498
+ ],
499
+ "dtype": "uint32",
500
+ "format": "f32-to-bf16",
501
+ "nbytes": 22544384,
502
+ "byteOffset": 0
503
+ }
504
+ ],
505
+ "md5sum": "93ffb5c0be2625c19d8f1411fbb2e187"
506
+ },
507
+ {
508
+ "dataPath": "params_shard_15.bin",
509
+ "format": "raw-shard",
510
+ "nbytes": 113246208,
511
+ "records": [
512
+ {
513
+ "name": "lm_head.q_weight",
514
+ "shape": [
515
+ 55296,
516
+ 512
517
+ ],
518
+ "dtype": "uint32",
519
+ "format": "f32-to-bf16",
520
+ "nbytes": 113246208,
521
+ "byteOffset": 0
522
+ }
523
+ ],
524
+ "md5sum": "ad297ed29c5c64ff691e30d12bfaba09"
525
+ },
526
+ {
527
+ "dataPath": "params_shard_16.bin",
528
+ "format": "raw-shard",
529
+ "nbytes": 21061632,
530
+ "records": [
531
+ {
532
+ "name": "model.layers.3.self_attn.qkv_proj.q_scale",
533
+ "shape": [
534
+ 12288,
535
+ 128
536
+ ],
537
+ "dtype": "bfloat16",
538
+ "format": "raw",
539
+ "nbytes": 3145728,
540
+ "byteOffset": 0
541
+ },
542
+ {
543
+ "name": "model.layers.3.self_attn.o_proj.q_weight",
544
+ "shape": [
545
+ 4096,
546
+ 512
547
+ ],
548
+ "dtype": "uint32",
549
+ "format": "f32-to-bf16",
550
+ "nbytes": 8388608,
551
+ "byteOffset": 3145728
552
+ },
553
+ {
554
+ "name": "model.layers.3.self_attn.o_proj.q_scale",
555
+ "shape": [
556
+ 4096,
557
+ 128
558
+ ],
559
+ "dtype": "bfloat16",
560
+ "format": "raw",
561
+ "nbytes": 1048576,
562
+ "byteOffset": 11534336
563
+ },
564
+ {
565
+ "name": "model.layers.3.mlp.gate_up_proj.q_scale",
566
+ "shape": [
567
+ 22016,
568
+ 128
569
+ ],
570
+ "dtype": "bfloat16",
571
+ "format": "raw",
572
+ "nbytes": 5636096,
573
+ "byteOffset": 12582912
574
+ },
575
+ {
576
+ "name": "model.layers.3.mlp.down_proj.q_scale",
577
+ "shape": [
578
+ 4096,
579
+ 344
580
+ ],
581
+ "dtype": "bfloat16",
582
+ "format": "raw",
583
+ "nbytes": 2818048,
584
+ "byteOffset": 18219008
585
+ },
586
+ {
587
+ "name": "model.layers.3.input_layernorm.weight",
588
+ "shape": [
589
+ 4096
590
+ ],
591
+ "dtype": "bfloat16",
592
+ "format": "raw",
593
+ "nbytes": 8192,
594
+ "byteOffset": 21037056
595
+ },
596
+ {
597
+ "name": "model.layers.3.post_attention_layernorm.weight",
598
+ "shape": [
599
+ 4096
600
+ ],
601
+ "dtype": "bfloat16",
602
+ "format": "raw",
603
+ "nbytes": 8192,
604
+ "byteOffset": 21045248
605
+ },
606
+ {
607
+ "name": "model.norm.weight",
608
+ "shape": [
609
+ 4096
610
+ ],
611
+ "dtype": "bfloat16",
612
+ "format": "raw",
613
+ "nbytes": 8192,
614
+ "byteOffset": 21053440
615
+ }
616
+ ],
617
+ "md5sum": "3865398255ece47a36de555fa54b952e"
618
+ },
619
+ {
620
+ "dataPath": "params_shard_17.bin",
621
+ "format": "raw-shard",
622
+ "nbytes": 14155776,
623
+ "records": [
624
+ {
625
+ "name": "lm_head.q_scale",
626
+ "shape": [
627
+ 55296,
628
+ 128
629
+ ],
630
+ "dtype": "bfloat16",
631
+ "format": "raw",
632
+ "nbytes": 14155776,
633
+ "byteOffset": 0
634
+ }
635
+ ],
636
+ "md5sum": "2b16bda1d0ada369b8f31ed936e081f6"
637
+ }
638
+ ]
639
+ }
ndarray-cache.json ADDED
@@ -0,0 +1,639 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 45,
4
+ "ParamBytes": 789200896.0,
5
+ "BitsPerParam": 5.000788364646225
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 113246208,
12
+ "records": [
13
+ {
14
+ "name": "model.embed_tokens.q_weight",
15
+ "shape": [
16
+ 55296,
17
+ 512
18
+ ],
19
+ "dtype": "uint32",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 113246208,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "fe870c24c37d50833f35e5356e23fa21"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 25165824,
31
+ "records": [
32
+ {
33
+ "name": "model.layers.0.self_attn.qkv_proj.q_weight",
34
+ "shape": [
35
+ 12288,
36
+ 512
37
+ ],
38
+ "dtype": "uint32",
39
+ "format": "f32-to-bf16",
40
+ "nbytes": 25165824,
41
+ "byteOffset": 0
42
+ }
43
+ ],
44
+ "md5sum": "2732fee74ddceeec2a570c77bcf3a75f"
45
+ },
46
+ {
47
+ "dataPath": "params_shard_2.bin",
48
+ "format": "raw-shard",
49
+ "nbytes": 45088768,
50
+ "records": [
51
+ {
52
+ "name": "model.layers.0.mlp.gate_up_proj.q_weight",
53
+ "shape": [
54
+ 22016,
55
+ 512
56
+ ],
57
+ "dtype": "uint32",
58
+ "format": "f32-to-bf16",
59
+ "nbytes": 45088768,
60
+ "byteOffset": 0
61
+ }
62
+ ],
63
+ "md5sum": "5583cabee6e1df6bc137f2495e41aacb"
64
+ },
65
+ {
66
+ "dataPath": "params_shard_3.bin",
67
+ "format": "raw-shard",
68
+ "nbytes": 22544384,
69
+ "records": [
70
+ {
71
+ "name": "model.layers.0.mlp.down_proj.q_weight",
72
+ "shape": [
73
+ 4096,
74
+ 1376
75
+ ],
76
+ "dtype": "uint32",
77
+ "format": "f32-to-bf16",
78
+ "nbytes": 22544384,
79
+ "byteOffset": 0
80
+ }
81
+ ],
82
+ "md5sum": "cea6f0488cca4a33509e06aeb87e9658"
83
+ },
84
+ {
85
+ "dataPath": "params_shard_4.bin",
86
+ "format": "raw-shard",
87
+ "nbytes": 32374784,
88
+ "records": [
89
+ {
90
+ "name": "model.embed_tokens.q_scale",
91
+ "shape": [
92
+ 55296,
93
+ 128
94
+ ],
95
+ "dtype": "float32",
96
+ "format": "f32-to-bf16",
97
+ "nbytes": 14155776,
98
+ "byteOffset": 0
99
+ },
100
+ {
101
+ "name": "model.layers.0.self_attn.qkv_proj.q_scale",
102
+ "shape": [
103
+ 12288,
104
+ 128
105
+ ],
106
+ "dtype": "float32",
107
+ "format": "f32-to-bf16",
108
+ "nbytes": 3145728,
109
+ "byteOffset": 14155776
110
+ },
111
+ {
112
+ "name": "model.layers.0.self_attn.o_proj.q_weight",
113
+ "shape": [
114
+ 4096,
115
+ 512
116
+ ],
117
+ "dtype": "uint32",
118
+ "format": "f32-to-bf16",
119
+ "nbytes": 8388608,
120
+ "byteOffset": 17301504
121
+ },
122
+ {
123
+ "name": "model.layers.0.self_attn.o_proj.q_scale",
124
+ "shape": [
125
+ 4096,
126
+ 128
127
+ ],
128
+ "dtype": "float32",
129
+ "format": "f32-to-bf16",
130
+ "nbytes": 1048576,
131
+ "byteOffset": 25690112
132
+ },
133
+ {
134
+ "name": "model.layers.0.mlp.gate_up_proj.q_scale",
135
+ "shape": [
136
+ 22016,
137
+ 128
138
+ ],
139
+ "dtype": "float32",
140
+ "format": "f32-to-bf16",
141
+ "nbytes": 5636096,
142
+ "byteOffset": 26738688
143
+ }
144
+ ],
145
+ "md5sum": "b6cdce6d80073579dc41fd1cea4b3f2b"
146
+ },
147
+ {
148
+ "dataPath": "params_shard_5.bin",
149
+ "format": "raw-shard",
150
+ "nbytes": 31145984,
151
+ "records": [
152
+ {
153
+ "name": "model.layers.0.mlp.down_proj.q_scale",
154
+ "shape": [
155
+ 4096,
156
+ 344
157
+ ],
158
+ "dtype": "float32",
159
+ "format": "f32-to-bf16",
160
+ "nbytes": 2818048,
161
+ "byteOffset": 0
162
+ },
163
+ {
164
+ "name": "model.layers.0.input_layernorm.weight",
165
+ "shape": [
166
+ 4096
167
+ ],
168
+ "dtype": "float32",
169
+ "format": "f32-to-bf16",
170
+ "nbytes": 8192,
171
+ "byteOffset": 2818048
172
+ },
173
+ {
174
+ "name": "model.layers.0.post_attention_layernorm.weight",
175
+ "shape": [
176
+ 4096
177
+ ],
178
+ "dtype": "float32",
179
+ "format": "f32-to-bf16",
180
+ "nbytes": 8192,
181
+ "byteOffset": 2826240
182
+ },
183
+ {
184
+ "name": "model.layers.1.self_attn.qkv_proj.q_weight",
185
+ "shape": [
186
+ 12288,
187
+ 512
188
+ ],
189
+ "dtype": "uint32",
190
+ "format": "f32-to-bf16",
191
+ "nbytes": 25165824,
192
+ "byteOffset": 2834432
193
+ },
194
+ {
195
+ "name": "model.layers.1.self_attn.qkv_proj.q_scale",
196
+ "shape": [
197
+ 12288,
198
+ 128
199
+ ],
200
+ "dtype": "float32",
201
+ "format": "f32-to-bf16",
202
+ "nbytes": 3145728,
203
+ "byteOffset": 28000256
204
+ }
205
+ ],
206
+ "md5sum": "24947fc54d4d479312e736080fe47c06"
207
+ },
208
+ {
209
+ "dataPath": "params_shard_6.bin",
210
+ "format": "raw-shard",
211
+ "nbytes": 45088768,
212
+ "records": [
213
+ {
214
+ "name": "model.layers.1.mlp.gate_up_proj.q_weight",
215
+ "shape": [
216
+ 22016,
217
+ 512
218
+ ],
219
+ "dtype": "uint32",
220
+ "format": "f32-to-bf16",
221
+ "nbytes": 45088768,
222
+ "byteOffset": 0
223
+ }
224
+ ],
225
+ "md5sum": "f81606af90319436b0c52bc3087a8a75"
226
+ },
227
+ {
228
+ "dataPath": "params_shard_7.bin",
229
+ "format": "raw-shard",
230
+ "nbytes": 22544384,
231
+ "records": [
232
+ {
233
+ "name": "model.layers.1.mlp.down_proj.q_weight",
234
+ "shape": [
235
+ 4096,
236
+ 1376
237
+ ],
238
+ "dtype": "uint32",
239
+ "format": "f32-to-bf16",
240
+ "nbytes": 22544384,
241
+ "byteOffset": 0
242
+ }
243
+ ],
244
+ "md5sum": "2f3b104d6012ee96282f1cb68e9a540a"
245
+ },
246
+ {
247
+ "dataPath": "params_shard_8.bin",
248
+ "format": "raw-shard",
249
+ "nbytes": 25165824,
250
+ "records": [
251
+ {
252
+ "name": "model.layers.2.self_attn.qkv_proj.q_weight",
253
+ "shape": [
254
+ 12288,
255
+ 512
256
+ ],
257
+ "dtype": "uint32",
258
+ "format": "f32-to-bf16",
259
+ "nbytes": 25165824,
260
+ "byteOffset": 0
261
+ }
262
+ ],
263
+ "md5sum": "92644b7b40d42f6a8f79a2780b4287e0"
264
+ },
265
+ {
266
+ "dataPath": "params_shard_9.bin",
267
+ "format": "raw-shard",
268
+ "nbytes": 45088768,
269
+ "records": [
270
+ {
271
+ "name": "model.layers.2.mlp.gate_up_proj.q_weight",
272
+ "shape": [
273
+ 22016,
274
+ 512
275
+ ],
276
+ "dtype": "uint32",
277
+ "format": "f32-to-bf16",
278
+ "nbytes": 45088768,
279
+ "byteOffset": 0
280
+ }
281
+ ],
282
+ "md5sum": "cbc67bc2b9741282cdba7dc02cf94d88"
283
+ },
284
+ {
285
+ "dataPath": "params_shard_10.bin",
286
+ "format": "raw-shard",
287
+ "nbytes": 30490624,
288
+ "records": [
289
+ {
290
+ "name": "model.layers.1.self_attn.o_proj.q_weight",
291
+ "shape": [
292
+ 4096,
293
+ 512
294
+ ],
295
+ "dtype": "uint32",
296
+ "format": "f32-to-bf16",
297
+ "nbytes": 8388608,
298
+ "byteOffset": 0
299
+ },
300
+ {
301
+ "name": "model.layers.1.self_attn.o_proj.q_scale",
302
+ "shape": [
303
+ 4096,
304
+ 128
305
+ ],
306
+ "dtype": "float32",
307
+ "format": "f32-to-bf16",
308
+ "nbytes": 1048576,
309
+ "byteOffset": 8388608
310
+ },
311
+ {
312
+ "name": "model.layers.1.mlp.gate_up_proj.q_scale",
313
+ "shape": [
314
+ 22016,
315
+ 128
316
+ ],
317
+ "dtype": "float32",
318
+ "format": "f32-to-bf16",
319
+ "nbytes": 5636096,
320
+ "byteOffset": 9437184
321
+ },
322
+ {
323
+ "name": "model.layers.1.mlp.down_proj.q_scale",
324
+ "shape": [
325
+ 4096,
326
+ 344
327
+ ],
328
+ "dtype": "float32",
329
+ "format": "f32-to-bf16",
330
+ "nbytes": 2818048,
331
+ "byteOffset": 15073280
332
+ },
333
+ {
334
+ "name": "model.layers.1.input_layernorm.weight",
335
+ "shape": [
336
+ 4096
337
+ ],
338
+ "dtype": "float32",
339
+ "format": "f32-to-bf16",
340
+ "nbytes": 8192,
341
+ "byteOffset": 17891328
342
+ },
343
+ {
344
+ "name": "model.layers.1.post_attention_layernorm.weight",
345
+ "shape": [
346
+ 4096
347
+ ],
348
+ "dtype": "float32",
349
+ "format": "f32-to-bf16",
350
+ "nbytes": 8192,
351
+ "byteOffset": 17899520
352
+ },
353
+ {
354
+ "name": "model.layers.2.self_attn.qkv_proj.q_scale",
355
+ "shape": [
356
+ 12288,
357
+ 128
358
+ ],
359
+ "dtype": "float32",
360
+ "format": "f32-to-bf16",
361
+ "nbytes": 3145728,
362
+ "byteOffset": 17907712
363
+ },
364
+ {
365
+ "name": "model.layers.2.self_attn.o_proj.q_weight",
366
+ "shape": [
367
+ 4096,
368
+ 512
369
+ ],
370
+ "dtype": "uint32",
371
+ "format": "f32-to-bf16",
372
+ "nbytes": 8388608,
373
+ "byteOffset": 21053440
374
+ },
375
+ {
376
+ "name": "model.layers.2.self_attn.o_proj.q_scale",
377
+ "shape": [
378
+ 4096,
379
+ 128
380
+ ],
381
+ "dtype": "float32",
382
+ "format": "f32-to-bf16",
383
+ "nbytes": 1048576,
384
+ "byteOffset": 29442048
385
+ }
386
+ ],
387
+ "md5sum": "99fa11d45465d2a56e742b32f1f9a1d8"
388
+ },
389
+ {
390
+ "dataPath": "params_shard_11.bin",
391
+ "format": "raw-shard",
392
+ "nbytes": 25165824,
393
+ "records": [
394
+ {
395
+ "name": "model.layers.3.self_attn.qkv_proj.q_weight",
396
+ "shape": [
397
+ 12288,
398
+ 512
399
+ ],
400
+ "dtype": "uint32",
401
+ "format": "f32-to-bf16",
402
+ "nbytes": 25165824,
403
+ "byteOffset": 0
404
+ }
405
+ ],
406
+ "md5sum": "e012050a37a0fcec4e06efd599d37d51"
407
+ },
408
+ {
409
+ "dataPath": "params_shard_12.bin",
410
+ "format": "raw-shard",
411
+ "nbytes": 31014912,
412
+ "records": [
413
+ {
414
+ "name": "model.layers.2.mlp.gate_up_proj.q_scale",
415
+ "shape": [
416
+ 22016,
417
+ 128
418
+ ],
419
+ "dtype": "float32",
420
+ "format": "f32-to-bf16",
421
+ "nbytes": 5636096,
422
+ "byteOffset": 0
423
+ },
424
+ {
425
+ "name": "model.layers.2.mlp.down_proj.q_weight",
426
+ "shape": [
427
+ 4096,
428
+ 1376
429
+ ],
430
+ "dtype": "uint32",
431
+ "format": "f32-to-bf16",
432
+ "nbytes": 22544384,
433
+ "byteOffset": 5636096
434
+ },
435
+ {
436
+ "name": "model.layers.2.mlp.down_proj.q_scale",
437
+ "shape": [
438
+ 4096,
439
+ 344
440
+ ],
441
+ "dtype": "float32",
442
+ "format": "f32-to-bf16",
443
+ "nbytes": 2818048,
444
+ "byteOffset": 28180480
445
+ },
446
+ {
447
+ "name": "model.layers.2.input_layernorm.weight",
448
+ "shape": [
449
+ 4096
450
+ ],
451
+ "dtype": "float32",
452
+ "format": "f32-to-bf16",
453
+ "nbytes": 8192,
454
+ "byteOffset": 30998528
455
+ },
456
+ {
457
+ "name": "model.layers.2.post_attention_layernorm.weight",
458
+ "shape": [
459
+ 4096
460
+ ],
461
+ "dtype": "float32",
462
+ "format": "f32-to-bf16",
463
+ "nbytes": 8192,
464
+ "byteOffset": 31006720
465
+ }
466
+ ],
467
+ "md5sum": "4e0c7558b16811665b346390a73a8911"
468
+ },
469
+ {
470
+ "dataPath": "params_shard_13.bin",
471
+ "format": "raw-shard",
472
+ "nbytes": 45088768,
473
+ "records": [
474
+ {
475
+ "name": "model.layers.3.mlp.gate_up_proj.q_weight",
476
+ "shape": [
477
+ 22016,
478
+ 512
479
+ ],
480
+ "dtype": "uint32",
481
+ "format": "f32-to-bf16",
482
+ "nbytes": 45088768,
483
+ "byteOffset": 0
484
+ }
485
+ ],
486
+ "md5sum": "b969fb28468abdae8a625930d384bdec"
487
+ },
488
+ {
489
+ "dataPath": "params_shard_14.bin",
490
+ "format": "raw-shard",
491
+ "nbytes": 22544384,
492
+ "records": [
493
+ {
494
+ "name": "model.layers.3.mlp.down_proj.q_weight",
495
+ "shape": [
496
+ 4096,
497
+ 1376
498
+ ],
499
+ "dtype": "uint32",
500
+ "format": "f32-to-bf16",
501
+ "nbytes": 22544384,
502
+ "byteOffset": 0
503
+ }
504
+ ],
505
+ "md5sum": "93ffb5c0be2625c19d8f1411fbb2e187"
506
+ },
507
+ {
508
+ "dataPath": "params_shard_15.bin",
509
+ "format": "raw-shard",
510
+ "nbytes": 113246208,
511
+ "records": [
512
+ {
513
+ "name": "lm_head.q_weight",
514
+ "shape": [
515
+ 55296,
516
+ 512
517
+ ],
518
+ "dtype": "uint32",
519
+ "format": "f32-to-bf16",
520
+ "nbytes": 113246208,
521
+ "byteOffset": 0
522
+ }
523
+ ],
524
+ "md5sum": "ad297ed29c5c64ff691e30d12bfaba09"
525
+ },
526
+ {
527
+ "dataPath": "params_shard_16.bin",
528
+ "format": "raw-shard",
529
+ "nbytes": 21061632,
530
+ "records": [
531
+ {
532
+ "name": "model.layers.3.self_attn.qkv_proj.q_scale",
533
+ "shape": [
534
+ 12288,
535
+ 128
536
+ ],
537
+ "dtype": "float32",
538
+ "format": "f32-to-bf16",
539
+ "nbytes": 3145728,
540
+ "byteOffset": 0
541
+ },
542
+ {
543
+ "name": "model.layers.3.self_attn.o_proj.q_weight",
544
+ "shape": [
545
+ 4096,
546
+ 512
547
+ ],
548
+ "dtype": "uint32",
549
+ "format": "f32-to-bf16",
550
+ "nbytes": 8388608,
551
+ "byteOffset": 3145728
552
+ },
553
+ {
554
+ "name": "model.layers.3.self_attn.o_proj.q_scale",
555
+ "shape": [
556
+ 4096,
557
+ 128
558
+ ],
559
+ "dtype": "float32",
560
+ "format": "f32-to-bf16",
561
+ "nbytes": 1048576,
562
+ "byteOffset": 11534336
563
+ },
564
+ {
565
+ "name": "model.layers.3.mlp.gate_up_proj.q_scale",
566
+ "shape": [
567
+ 22016,
568
+ 128
569
+ ],
570
+ "dtype": "float32",
571
+ "format": "f32-to-bf16",
572
+ "nbytes": 5636096,
573
+ "byteOffset": 12582912
574
+ },
575
+ {
576
+ "name": "model.layers.3.mlp.down_proj.q_scale",
577
+ "shape": [
578
+ 4096,
579
+ 344
580
+ ],
581
+ "dtype": "float32",
582
+ "format": "f32-to-bf16",
583
+ "nbytes": 2818048,
584
+ "byteOffset": 18219008
585
+ },
586
+ {
587
+ "name": "model.layers.3.input_layernorm.weight",
588
+ "shape": [
589
+ 4096
590
+ ],
591
+ "dtype": "float32",
592
+ "format": "f32-to-bf16",
593
+ "nbytes": 8192,
594
+ "byteOffset": 21037056
595
+ },
596
+ {
597
+ "name": "model.layers.3.post_attention_layernorm.weight",
598
+ "shape": [
599
+ 4096
600
+ ],
601
+ "dtype": "float32",
602
+ "format": "f32-to-bf16",
603
+ "nbytes": 8192,
604
+ "byteOffset": 21045248
605
+ },
606
+ {
607
+ "name": "model.norm.weight",
608
+ "shape": [
609
+ 4096
610
+ ],
611
+ "dtype": "float32",
612
+ "format": "f32-to-bf16",
613
+ "nbytes": 8192,
614
+ "byteOffset": 21053440
615
+ }
616
+ ],
617
+ "md5sum": "3865398255ece47a36de555fa54b952e"
618
+ },
619
+ {
620
+ "dataPath": "params_shard_17.bin",
621
+ "format": "raw-shard",
622
+ "nbytes": 14155776,
623
+ "records": [
624
+ {
625
+ "name": "lm_head.q_scale",
626
+ "shape": [
627
+ 55296,
628
+ 128
629
+ ],
630
+ "dtype": "float32",
631
+ "format": "f32-to-bf16",
632
+ "nbytes": 14155776,
633
+ "byteOffset": 0
634
+ }
635
+ ],
636
+ "md5sum": "2b16bda1d0ada369b8f31ed936e081f6"
637
+ }
638
+ ]
639
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b924a0640ca78b3b37ea8d83ee06c16f7d4b418f4722f8f87924a6f8ef239d8b
3
+ size 113246208
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba24116ebde38a3d8de7838dcecde6de3f99bb60f73bb244875d5c7e920dcc15
3
+ size 25165824
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b29b19ef21b2dd4f67d879d3ce83f2c090f97aa075447f8294fa523d4b07f5e0
3
+ size 30490624
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d59a750c6837d64bd8f1e40d5af9b15eac719a598d20d6f5ae0120f3da342a18
3
+ size 25165824
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:493f9b539056c6d40957440a9590c96dec1311a3b4bb5b660f6c4e1409abb453
3
+ size 31014912
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59da04bd8b6c0afa0f9b620a3ee43ab825f0ede87523414ba27bd8b307164add
3
+ size 45088768
params_shard_14.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a76faf9b956d98fa99fdbc79e8eb400b4e8b87c7589f6ccef73d8a2310ba71e
3
+ size 22544384
params_shard_15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d59f54e18c377bcd2a1d20e2bbece291ec46f568bc7b7bda212cec60b598eef1
3
+ size 113246208
params_shard_16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb9e5dfa7f88b98e312ac89df5ac83efc7a838ffb092a9e3b92952c354043edf
3
+ size 21061632
params_shard_17.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b352ebb41a0e5cbd2fc287d54e71ef67116fd0755e5019d1d5d5591254c03fdb
3
+ size 14155776
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dc0a54404a0a6cbdabae1fc185fb77d79d2bf78e2fb702289d3e3579be53d97
3
+ size 45088768
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a698caf8a68f48e7c8bbc189dffdcd7628d4f3cb0938e8f08db3bf68304ea921
3
+ size 22544384
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eca3986a38430361f42baee35d04fd9037324719ba3cd08820fd34f3d02c670
3
+ size 32374784
params_shard_5.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41cd8f78595c2519d3ae0c24eae75452c25f85aa7ded823a80515df36f8e4543
3
+ size 31145984
params_shard_6.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3de5593138676f78244baabb317692ba9a51478ceea60843d102884ac28f4d77
3
+ size 45088768
params_shard_7.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:299537347aea2d23a2f37d283f4ec169739da06d50deff8ea5898d45f926defa
3
+ size 22544384
params_shard_8.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aebe29c963aa795121a556295af577766cd0b667c0df2a4e0f2ce705a940a842
3
+ size 25165824
params_shard_9.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:289ce81400fe8ef756affe43e44450297e10e5ba3d279511d809b6d7d7eea9e3
3
+ size 45088768
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3b8844863b200dfcca971db228e96ce388290dfcf72c15d7a9d2f604bac787c
3
+ size 844403
tokenizer_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<s>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "clean_up_tokenization_spaces": false,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "</s>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "legacy": true,
22
+ "model_max_length": 1000000000000000019884624838656,
23
+ "pad_token": null,
24
+ "sp_model_kwargs": {},
25
+ "tokenizer_class": "LlamaTokenizer",
26
+ "unk_token": {
27
+ "__type": "AddedToken",
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ },
34
+ "use_fast": false
35
+ }