zz814 commited on
Commit
b350f2e
1 Parent(s): 580744e

Upload 23 files

Browse files
mlc-chat-config.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "llama",
3
+ "quantization": "q4f16_1",
4
+ "model_config": {
5
+ "hidden_size": 4096,
6
+ "intermediate_size": 11008,
7
+ "num_attention_heads": 32,
8
+ "num_hidden_layers": 4,
9
+ "rms_norm_eps": 1e-05,
10
+ "vocab_size": 55296,
11
+ "position_embedding_base": 10000.0,
12
+ "context_window_size": 4096,
13
+ "prefill_chunk_size": 2048,
14
+ "num_key_value_heads": 32,
15
+ "head_dim": 128,
16
+ "tensor_parallel_shards": 1,
17
+ "max_batch_size": 80
18
+ },
19
+ "vocab_size": 55296,
20
+ "context_window_size": 4096,
21
+ "sliding_window_size": -1,
22
+ "prefill_chunk_size": 2048,
23
+ "attention_sink_size": -1,
24
+ "tensor_parallel_shards": 1,
25
+ "mean_gen_len": 128,
26
+ "max_gen_len": 512,
27
+ "shift_fill_factor": 0.3,
28
+ "temperature": 0.2,
29
+ "presence_penalty": 0.0,
30
+ "frequency_penalty": 0.0,
31
+ "repetition_penalty": 1.0,
32
+ "top_p": 0.9,
33
+ "conv_template": {
34
+ "name": "llama-2",
35
+ "system_template": "[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n",
36
+ "system_message": "You are a helpful, respectful and honest assistant.",
37
+ "system_prefix_token_ids": [
38
+ 1
39
+ ],
40
+ "add_role_after_system_message": false,
41
+ "roles": {
42
+ "user": "<s>[INST]",
43
+ "assistant": "[/INST]",
44
+ "tool": "[INST]"
45
+ },
46
+ "role_templates": {
47
+ "user": "{user_message}",
48
+ "assistant": "{assistant_message}",
49
+ "tool": "{tool_message}"
50
+ },
51
+ "messages": [],
52
+ "seps": [
53
+ " ",
54
+ " </s>"
55
+ ],
56
+ "role_content_sep": " ",
57
+ "role_empty_sep": " ",
58
+ "stop_str": [
59
+ "[INST]"
60
+ ],
61
+ "stop_token_ids": [
62
+ 2
63
+ ],
64
+ "function_string": "",
65
+ "use_function_calling": false
66
+ },
67
+ "pad_token_id": 0,
68
+ "bos_token_id": 1,
69
+ "eos_token_id": 2,
70
+ "tokenizer_files": [
71
+ "tokenizer.model",
72
+ "tokenizer.json",
73
+ "tokenizer_config.json"
74
+ ],
75
+ "token_table_postproc_method": "byte_fallback",
76
+ "version": "0.1.0"
77
+ }
ndarray-cache.json ADDED
@@ -0,0 +1,639 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 45,
4
+ "ParamBytes": 710221824.0,
5
+ "BitsPerParam": 4.500335784941911
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 113246208,
12
+ "records": [
13
+ {
14
+ "name": "model.embed_tokens.q_weight",
15
+ "shape": [
16
+ 55296,
17
+ 512
18
+ ],
19
+ "dtype": "uint32",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 113246208,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "8a6e3d99368ecc20bfedb66a4753a489"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 25165824,
31
+ "records": [
32
+ {
33
+ "name": "model.layers.0.self_attn.qkv_proj.q_weight",
34
+ "shape": [
35
+ 12288,
36
+ 512
37
+ ],
38
+ "dtype": "uint32",
39
+ "format": "f32-to-bf16",
40
+ "nbytes": 25165824,
41
+ "byteOffset": 0
42
+ }
43
+ ],
44
+ "md5sum": "6dc6b5a86964a1b8eaf979f2bdb65c5f"
45
+ },
46
+ {
47
+ "dataPath": "params_shard_2.bin",
48
+ "format": "raw-shard",
49
+ "nbytes": 45088768,
50
+ "records": [
51
+ {
52
+ "name": "model.layers.0.mlp.gate_up_proj.q_weight",
53
+ "shape": [
54
+ 22016,
55
+ 512
56
+ ],
57
+ "dtype": "uint32",
58
+ "format": "f32-to-bf16",
59
+ "nbytes": 45088768,
60
+ "byteOffset": 0
61
+ }
62
+ ],
63
+ "md5sum": "f23c578d5279105a2475431b364d6082"
64
+ },
65
+ {
66
+ "dataPath": "params_shard_3.bin",
67
+ "format": "raw-shard",
68
+ "nbytes": 22544384,
69
+ "records": [
70
+ {
71
+ "name": "model.layers.0.mlp.down_proj.q_weight",
72
+ "shape": [
73
+ 4096,
74
+ 1376
75
+ ],
76
+ "dtype": "uint32",
77
+ "format": "f32-to-bf16",
78
+ "nbytes": 22544384,
79
+ "byteOffset": 0
80
+ }
81
+ ],
82
+ "md5sum": "de3af25993f69eee3d38d24398f34933"
83
+ },
84
+ {
85
+ "dataPath": "params_shard_4.bin",
86
+ "format": "raw-shard",
87
+ "nbytes": 32374784,
88
+ "records": [
89
+ {
90
+ "name": "model.embed_tokens.q_scale",
91
+ "shape": [
92
+ 55296,
93
+ 128
94
+ ],
95
+ "dtype": "float16",
96
+ "format": "f32-to-bf16",
97
+ "nbytes": 14155776,
98
+ "byteOffset": 0
99
+ },
100
+ {
101
+ "name": "model.layers.0.self_attn.qkv_proj.q_scale",
102
+ "shape": [
103
+ 12288,
104
+ 128
105
+ ],
106
+ "dtype": "float16",
107
+ "format": "f32-to-bf16",
108
+ "nbytes": 3145728,
109
+ "byteOffset": 14155776
110
+ },
111
+ {
112
+ "name": "model.layers.0.self_attn.o_proj.q_weight",
113
+ "shape": [
114
+ 4096,
115
+ 512
116
+ ],
117
+ "dtype": "uint32",
118
+ "format": "f32-to-bf16",
119
+ "nbytes": 8388608,
120
+ "byteOffset": 17301504
121
+ },
122
+ {
123
+ "name": "model.layers.0.self_attn.o_proj.q_scale",
124
+ "shape": [
125
+ 4096,
126
+ 128
127
+ ],
128
+ "dtype": "float16",
129
+ "format": "f32-to-bf16",
130
+ "nbytes": 1048576,
131
+ "byteOffset": 25690112
132
+ },
133
+ {
134
+ "name": "model.layers.0.mlp.gate_up_proj.q_scale",
135
+ "shape": [
136
+ 22016,
137
+ 128
138
+ ],
139
+ "dtype": "float16",
140
+ "format": "f32-to-bf16",
141
+ "nbytes": 5636096,
142
+ "byteOffset": 26738688
143
+ }
144
+ ],
145
+ "md5sum": "4bbe7d5de8b78ecaca0e56086a595d2b"
146
+ },
147
+ {
148
+ "dataPath": "params_shard_5.bin",
149
+ "format": "raw-shard",
150
+ "nbytes": 31145984,
151
+ "records": [
152
+ {
153
+ "name": "model.layers.0.mlp.down_proj.q_scale",
154
+ "shape": [
155
+ 4096,
156
+ 344
157
+ ],
158
+ "dtype": "float16",
159
+ "format": "f32-to-bf16",
160
+ "nbytes": 2818048,
161
+ "byteOffset": 0
162
+ },
163
+ {
164
+ "name": "model.layers.0.input_layernorm.weight",
165
+ "shape": [
166
+ 4096
167
+ ],
168
+ "dtype": "float16",
169
+ "format": "f32-to-bf16",
170
+ "nbytes": 8192,
171
+ "byteOffset": 2818048
172
+ },
173
+ {
174
+ "name": "model.layers.0.post_attention_layernorm.weight",
175
+ "shape": [
176
+ 4096
177
+ ],
178
+ "dtype": "float16",
179
+ "format": "f32-to-bf16",
180
+ "nbytes": 8192,
181
+ "byteOffset": 2826240
182
+ },
183
+ {
184
+ "name": "model.layers.1.self_attn.qkv_proj.q_weight",
185
+ "shape": [
186
+ 12288,
187
+ 512
188
+ ],
189
+ "dtype": "uint32",
190
+ "format": "f32-to-bf16",
191
+ "nbytes": 25165824,
192
+ "byteOffset": 2834432
193
+ },
194
+ {
195
+ "name": "model.layers.1.self_attn.qkv_proj.q_scale",
196
+ "shape": [
197
+ 12288,
198
+ 128
199
+ ],
200
+ "dtype": "float16",
201
+ "format": "f32-to-bf16",
202
+ "nbytes": 3145728,
203
+ "byteOffset": 28000256
204
+ }
205
+ ],
206
+ "md5sum": "f188574cefeead2adf6866163c151917"
207
+ },
208
+ {
209
+ "dataPath": "params_shard_6.bin",
210
+ "format": "raw-shard",
211
+ "nbytes": 45088768,
212
+ "records": [
213
+ {
214
+ "name": "model.layers.1.mlp.gate_up_proj.q_weight",
215
+ "shape": [
216
+ 22016,
217
+ 512
218
+ ],
219
+ "dtype": "uint32",
220
+ "format": "f32-to-bf16",
221
+ "nbytes": 45088768,
222
+ "byteOffset": 0
223
+ }
224
+ ],
225
+ "md5sum": "f419bbcd8f931798727fd5963f7b76a9"
226
+ },
227
+ {
228
+ "dataPath": "params_shard_7.bin",
229
+ "format": "raw-shard",
230
+ "nbytes": 22544384,
231
+ "records": [
232
+ {
233
+ "name": "model.layers.1.mlp.down_proj.q_weight",
234
+ "shape": [
235
+ 4096,
236
+ 1376
237
+ ],
238
+ "dtype": "uint32",
239
+ "format": "f32-to-bf16",
240
+ "nbytes": 22544384,
241
+ "byteOffset": 0
242
+ }
243
+ ],
244
+ "md5sum": "1ed071ab5bc7d7ed093e56bdcfc0dfc1"
245
+ },
246
+ {
247
+ "dataPath": "params_shard_8.bin",
248
+ "format": "raw-shard",
249
+ "nbytes": 25165824,
250
+ "records": [
251
+ {
252
+ "name": "model.layers.2.self_attn.qkv_proj.q_weight",
253
+ "shape": [
254
+ 12288,
255
+ 512
256
+ ],
257
+ "dtype": "uint32",
258
+ "format": "f32-to-bf16",
259
+ "nbytes": 25165824,
260
+ "byteOffset": 0
261
+ }
262
+ ],
263
+ "md5sum": "64517f2b3f4c35296dd9711cb6dd8748"
264
+ },
265
+ {
266
+ "dataPath": "params_shard_9.bin",
267
+ "format": "raw-shard",
268
+ "nbytes": 45088768,
269
+ "records": [
270
+ {
271
+ "name": "model.layers.2.mlp.gate_up_proj.q_weight",
272
+ "shape": [
273
+ 22016,
274
+ 512
275
+ ],
276
+ "dtype": "uint32",
277
+ "format": "f32-to-bf16",
278
+ "nbytes": 45088768,
279
+ "byteOffset": 0
280
+ }
281
+ ],
282
+ "md5sum": "f3449ee6e8be73f2f04a55eb1354eed7"
283
+ },
284
+ {
285
+ "dataPath": "params_shard_10.bin",
286
+ "format": "raw-shard",
287
+ "nbytes": 30490624,
288
+ "records": [
289
+ {
290
+ "name": "model.layers.1.self_attn.o_proj.q_weight",
291
+ "shape": [
292
+ 4096,
293
+ 512
294
+ ],
295
+ "dtype": "uint32",
296
+ "format": "f32-to-bf16",
297
+ "nbytes": 8388608,
298
+ "byteOffset": 0
299
+ },
300
+ {
301
+ "name": "model.layers.1.self_attn.o_proj.q_scale",
302
+ "shape": [
303
+ 4096,
304
+ 128
305
+ ],
306
+ "dtype": "float16",
307
+ "format": "f32-to-bf16",
308
+ "nbytes": 1048576,
309
+ "byteOffset": 8388608
310
+ },
311
+ {
312
+ "name": "model.layers.1.mlp.gate_up_proj.q_scale",
313
+ "shape": [
314
+ 22016,
315
+ 128
316
+ ],
317
+ "dtype": "float16",
318
+ "format": "f32-to-bf16",
319
+ "nbytes": 5636096,
320
+ "byteOffset": 9437184
321
+ },
322
+ {
323
+ "name": "model.layers.1.mlp.down_proj.q_scale",
324
+ "shape": [
325
+ 4096,
326
+ 344
327
+ ],
328
+ "dtype": "float16",
329
+ "format": "f32-to-bf16",
330
+ "nbytes": 2818048,
331
+ "byteOffset": 15073280
332
+ },
333
+ {
334
+ "name": "model.layers.1.input_layernorm.weight",
335
+ "shape": [
336
+ 4096
337
+ ],
338
+ "dtype": "float16",
339
+ "format": "f32-to-bf16",
340
+ "nbytes": 8192,
341
+ "byteOffset": 17891328
342
+ },
343
+ {
344
+ "name": "model.layers.1.post_attention_layernorm.weight",
345
+ "shape": [
346
+ 4096
347
+ ],
348
+ "dtype": "float16",
349
+ "format": "f32-to-bf16",
350
+ "nbytes": 8192,
351
+ "byteOffset": 17899520
352
+ },
353
+ {
354
+ "name": "model.layers.2.self_attn.qkv_proj.q_scale",
355
+ "shape": [
356
+ 12288,
357
+ 128
358
+ ],
359
+ "dtype": "float16",
360
+ "format": "f32-to-bf16",
361
+ "nbytes": 3145728,
362
+ "byteOffset": 17907712
363
+ },
364
+ {
365
+ "name": "model.layers.2.self_attn.o_proj.q_weight",
366
+ "shape": [
367
+ 4096,
368
+ 512
369
+ ],
370
+ "dtype": "uint32",
371
+ "format": "f32-to-bf16",
372
+ "nbytes": 8388608,
373
+ "byteOffset": 21053440
374
+ },
375
+ {
376
+ "name": "model.layers.2.self_attn.o_proj.q_scale",
377
+ "shape": [
378
+ 4096,
379
+ 128
380
+ ],
381
+ "dtype": "float16",
382
+ "format": "f32-to-bf16",
383
+ "nbytes": 1048576,
384
+ "byteOffset": 29442048
385
+ }
386
+ ],
387
+ "md5sum": "5b17842481ec0700776b00984d62fdd5"
388
+ },
389
+ {
390
+ "dataPath": "params_shard_11.bin",
391
+ "format": "raw-shard",
392
+ "nbytes": 25165824,
393
+ "records": [
394
+ {
395
+ "name": "model.layers.3.self_attn.qkv_proj.q_weight",
396
+ "shape": [
397
+ 12288,
398
+ 512
399
+ ],
400
+ "dtype": "uint32",
401
+ "format": "f32-to-bf16",
402
+ "nbytes": 25165824,
403
+ "byteOffset": 0
404
+ }
405
+ ],
406
+ "md5sum": "81dda134f78fe8bc32eb9333ffb489dd"
407
+ },
408
+ {
409
+ "dataPath": "params_shard_12.bin",
410
+ "format": "raw-shard",
411
+ "nbytes": 31014912,
412
+ "records": [
413
+ {
414
+ "name": "model.layers.2.mlp.gate_up_proj.q_scale",
415
+ "shape": [
416
+ 22016,
417
+ 128
418
+ ],
419
+ "dtype": "float16",
420
+ "format": "f32-to-bf16",
421
+ "nbytes": 5636096,
422
+ "byteOffset": 0
423
+ },
424
+ {
425
+ "name": "model.layers.2.mlp.down_proj.q_weight",
426
+ "shape": [
427
+ 4096,
428
+ 1376
429
+ ],
430
+ "dtype": "uint32",
431
+ "format": "f32-to-bf16",
432
+ "nbytes": 22544384,
433
+ "byteOffset": 5636096
434
+ },
435
+ {
436
+ "name": "model.layers.2.mlp.down_proj.q_scale",
437
+ "shape": [
438
+ 4096,
439
+ 344
440
+ ],
441
+ "dtype": "float16",
442
+ "format": "f32-to-bf16",
443
+ "nbytes": 2818048,
444
+ "byteOffset": 28180480
445
+ },
446
+ {
447
+ "name": "model.layers.2.input_layernorm.weight",
448
+ "shape": [
449
+ 4096
450
+ ],
451
+ "dtype": "float16",
452
+ "format": "f32-to-bf16",
453
+ "nbytes": 8192,
454
+ "byteOffset": 30998528
455
+ },
456
+ {
457
+ "name": "model.layers.2.post_attention_layernorm.weight",
458
+ "shape": [
459
+ 4096
460
+ ],
461
+ "dtype": "float16",
462
+ "format": "f32-to-bf16",
463
+ "nbytes": 8192,
464
+ "byteOffset": 31006720
465
+ }
466
+ ],
467
+ "md5sum": "59366ee4fb0104a9d93e7a45c7a1a27a"
468
+ },
469
+ {
470
+ "dataPath": "params_shard_13.bin",
471
+ "format": "raw-shard",
472
+ "nbytes": 45088768,
473
+ "records": [
474
+ {
475
+ "name": "model.layers.3.mlp.gate_up_proj.q_weight",
476
+ "shape": [
477
+ 22016,
478
+ 512
479
+ ],
480
+ "dtype": "uint32",
481
+ "format": "f32-to-bf16",
482
+ "nbytes": 45088768,
483
+ "byteOffset": 0
484
+ }
485
+ ],
486
+ "md5sum": "ccb83f6c6dac4773793fdc5c474cc8c2"
487
+ },
488
+ {
489
+ "dataPath": "params_shard_14.bin",
490
+ "format": "raw-shard",
491
+ "nbytes": 22544384,
492
+ "records": [
493
+ {
494
+ "name": "model.layers.3.mlp.down_proj.q_weight",
495
+ "shape": [
496
+ 4096,
497
+ 1376
498
+ ],
499
+ "dtype": "uint32",
500
+ "format": "f32-to-bf16",
501
+ "nbytes": 22544384,
502
+ "byteOffset": 0
503
+ }
504
+ ],
505
+ "md5sum": "88a2bbad50d90a1e27ba7d1490d75aee"
506
+ },
507
+ {
508
+ "dataPath": "params_shard_15.bin",
509
+ "format": "raw-shard",
510
+ "nbytes": 113246208,
511
+ "records": [
512
+ {
513
+ "name": "lm_head.q_weight",
514
+ "shape": [
515
+ 55296,
516
+ 512
517
+ ],
518
+ "dtype": "uint32",
519
+ "format": "f32-to-bf16",
520
+ "nbytes": 113246208,
521
+ "byteOffset": 0
522
+ }
523
+ ],
524
+ "md5sum": "576ee2120b5bcefc80df4416726393ce"
525
+ },
526
+ {
527
+ "dataPath": "params_shard_16.bin",
528
+ "format": "raw-shard",
529
+ "nbytes": 21061632,
530
+ "records": [
531
+ {
532
+ "name": "model.layers.3.self_attn.qkv_proj.q_scale",
533
+ "shape": [
534
+ 12288,
535
+ 128
536
+ ],
537
+ "dtype": "float16",
538
+ "format": "f32-to-bf16",
539
+ "nbytes": 3145728,
540
+ "byteOffset": 0
541
+ },
542
+ {
543
+ "name": "model.layers.3.self_attn.o_proj.q_weight",
544
+ "shape": [
545
+ 4096,
546
+ 512
547
+ ],
548
+ "dtype": "uint32",
549
+ "format": "f32-to-bf16",
550
+ "nbytes": 8388608,
551
+ "byteOffset": 3145728
552
+ },
553
+ {
554
+ "name": "model.layers.3.self_attn.o_proj.q_scale",
555
+ "shape": [
556
+ 4096,
557
+ 128
558
+ ],
559
+ "dtype": "float16",
560
+ "format": "f32-to-bf16",
561
+ "nbytes": 1048576,
562
+ "byteOffset": 11534336
563
+ },
564
+ {
565
+ "name": "model.layers.3.mlp.gate_up_proj.q_scale",
566
+ "shape": [
567
+ 22016,
568
+ 128
569
+ ],
570
+ "dtype": "float16",
571
+ "format": "f32-to-bf16",
572
+ "nbytes": 5636096,
573
+ "byteOffset": 12582912
574
+ },
575
+ {
576
+ "name": "model.layers.3.mlp.down_proj.q_scale",
577
+ "shape": [
578
+ 4096,
579
+ 344
580
+ ],
581
+ "dtype": "float16",
582
+ "format": "f32-to-bf16",
583
+ "nbytes": 2818048,
584
+ "byteOffset": 18219008
585
+ },
586
+ {
587
+ "name": "model.layers.3.input_layernorm.weight",
588
+ "shape": [
589
+ 4096
590
+ ],
591
+ "dtype": "float16",
592
+ "format": "f32-to-bf16",
593
+ "nbytes": 8192,
594
+ "byteOffset": 21037056
595
+ },
596
+ {
597
+ "name": "model.layers.3.post_attention_layernorm.weight",
598
+ "shape": [
599
+ 4096
600
+ ],
601
+ "dtype": "float16",
602
+ "format": "f32-to-bf16",
603
+ "nbytes": 8192,
604
+ "byteOffset": 21045248
605
+ },
606
+ {
607
+ "name": "model.norm.weight",
608
+ "shape": [
609
+ 4096
610
+ ],
611
+ "dtype": "float16",
612
+ "format": "f32-to-bf16",
613
+ "nbytes": 8192,
614
+ "byteOffset": 21053440
615
+ }
616
+ ],
617
+ "md5sum": "51bd241eece6fa67f1a4bd0dde4d5fff"
618
+ },
619
+ {
620
+ "dataPath": "params_shard_17.bin",
621
+ "format": "raw-shard",
622
+ "nbytes": 14155776,
623
+ "records": [
624
+ {
625
+ "name": "lm_head.q_scale",
626
+ "shape": [
627
+ 55296,
628
+ 128
629
+ ],
630
+ "dtype": "float16",
631
+ "format": "f32-to-bf16",
632
+ "nbytes": 14155776,
633
+ "byteOffset": 0
634
+ }
635
+ ],
636
+ "md5sum": "dc48e203ccd2de292325004b14806dd3"
637
+ }
638
+ ]
639
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12a8cc38fcc2589fd889e8ecdd10654c9be0510fbdb83e1c4a233597ffcba4bc
3
+ size 113246208
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b4edc2221b07ebd721d2b27381bcaa7ed8059dd3ac85b5513af97e9ff5595ba
3
+ size 25165824
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd0b95ae347c76c1454a393253c09d30ce1202efd3b2c29c8ca130f2b113aef2
3
+ size 30490624
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd6374d715b5a92a93b1571e6edddf117a0eb24e0eb63ec79932de042800db23
3
+ size 25165824
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d455875abb27ff44ed4baef4291be300c6db3f47b5d5d5133f41691b876b7f37
3
+ size 31014912
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba0d5148c6e7d862a123bd400b56b7dc292ec5a8ad3c665ae3a4198027792026
3
+ size 45088768
params_shard_14.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d60bb747bee9b5fd0becbf139037d8113f2a2d251b64188f948197121b0d259
3
+ size 22544384
params_shard_15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13683d7ea0ecd4e3f3ff72fd219d828e3096b4a57c6b1781ddd03d50b43dd0d7
3
+ size 113246208
params_shard_16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec55c25703b4aec39831fe996222f3fc36b1f9eebc187e4e33c190768c2a2886
3
+ size 21061632
params_shard_17.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a52192ac0f31f329a761aa7977d0efd8b5587e098a0b51eadcb0ad9c0678dd7f
3
+ size 14155776
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c68eda0b375a7ca086bd9d3f9d3bd0a708e1f47b31ea684a24f1cb9061f4e8c
3
+ size 45088768
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43b408a53f6744f067dde7ad1a9387c8bccbda2ee1b91775bac77fc7bd7c7846
3
+ size 22544384
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f28956782a37313e2fd0cfe5b61716ba1f38368fed767cdf454dee838152f87f
3
+ size 32374784
params_shard_5.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:857752056bd3b48cd88685fa210c284f07ea124b1d561713eaf8b701a6422656
3
+ size 31145984
params_shard_6.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:964e937791d49589b084ef869e9377150ab53cdfea8130c7dd374c02af843058
3
+ size 45088768
params_shard_7.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e746632367a8469572b3dc9d21834d76a9ee55e422e027a09ebddf1ca4d48a66
3
+ size 22544384
params_shard_8.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2731b2d5ba1b3d737d21f3bb6f36eeb575cd9e0a190f4381bd06baaa7216ed4a
3
+ size 25165824
params_shard_9.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa239d3082116dd776c68f050bb217aac21095da71915522283f0f6b50898400
3
+ size 45088768
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3b8844863b200dfcca971db228e96ce388290dfcf72c15d7a9d2f604bac787c
3
+ size 844403
tokenizer_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<s>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "clean_up_tokenization_spaces": false,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "</s>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "legacy": true,
22
+ "model_max_length": 1000000000000000019884624838656,
23
+ "pad_token": null,
24
+ "sp_model_kwargs": {},
25
+ "tokenizer_class": "LlamaTokenizer",
26
+ "unk_token": {
27
+ "__type": "AddedToken",
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ },
34
+ "use_fast": false
35
+ }