jykim310 commited on
Commit
125ac6c
·
verified ·
1 Parent(s): a3b529f

Upload 29 files

Browse files
cpt_st-vicuna-v1.3-1.5b-ppl-q4f16_1-android.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fd5098f808a27559bdf3b9a1d9bfaa7ddd682e690d777b1ad451fc7cd6b3107
3
+ size 342998
mlc-chat-config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "llama",
3
+ "quantization": "q4f16_1",
4
+ "model_config": {
5
+ "hidden_size": 4096,
6
+ "intermediate_size": 11008,
7
+ "num_attention_heads": 32,
8
+ "num_hidden_layers": 6,
9
+ "rms_norm_eps": 1e-06,
10
+ "vocab_size": 32000,
11
+ "position_embedding_base": 10000,
12
+ "context_window_size": 768,
13
+ "prefill_chunk_size": 768,
14
+ "num_key_value_heads": 32,
15
+ "head_dim": 128,
16
+ "tensor_parallel_shards": 1,
17
+ "max_batch_size": 80
18
+ },
19
+ "vocab_size": 32000,
20
+ "context_window_size": 768,
21
+ "sliding_window_size": -1,
22
+ "prefill_chunk_size": 768,
23
+ "attention_sink_size": -1,
24
+ "tensor_parallel_shards": 1,
25
+ "mean_gen_len": 128,
26
+ "max_gen_len": 512,
27
+ "shift_fill_factor": 0.3,
28
+ "temperature": 0,
29
+ "presence_penalty": 0.0,
30
+ "frequency_penalty": 0.0,
31
+ "repetition_penalty": 1.0,
32
+ "top_p": 0.95,
33
+ "conv_template": "vicuna_v1.1",
34
+ "pad_token_id": 0,
35
+ "bos_token_id": 1,
36
+ "eos_token_id": 2,
37
+ "tokenizer_files": [
38
+ "tokenizer.model",
39
+ "tokenizer_config.json"
40
+ ],
41
+ "version": "0.1.0"
42
+ }
ndarray-cache.json ADDED
@@ -0,0 +1,903 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 65,
4
+ "ParamBytes": 830578688.0,
5
+ "BitsPerParam": 4.500414746671623
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 65536000,
12
+ "records": [
13
+ {
14
+ "name": "model.embed_tokens.q_weight",
15
+ "shape": [
16
+ 32000,
17
+ 512
18
+ ],
19
+ "dtype": "uint32",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 65536000,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "6074b8d5faea8fb20a1397a0b432521b"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 33357824,
31
+ "records": [
32
+ {
33
+ "name": "model.embed_tokens.q_scale",
34
+ "shape": [
35
+ 32000,
36
+ 128
37
+ ],
38
+ "dtype": "float16",
39
+ "format": "f32-to-bf16",
40
+ "nbytes": 8192000,
41
+ "byteOffset": 0
42
+ },
43
+ {
44
+ "name": "model.layers.0.self_attn.qkv_proj.q_weight",
45
+ "shape": [
46
+ 12288,
47
+ 512
48
+ ],
49
+ "dtype": "uint32",
50
+ "format": "f32-to-bf16",
51
+ "nbytes": 25165824,
52
+ "byteOffset": 8192000
53
+ }
54
+ ],
55
+ "md5sum": "8659d98c418f518bdb8c8b722f6dc22a"
56
+ },
57
+ {
58
+ "dataPath": "params_shard_2.bin",
59
+ "format": "raw-shard",
60
+ "nbytes": 45088768,
61
+ "records": [
62
+ {
63
+ "name": "model.layers.0.mlp.gate_up_proj.q_weight",
64
+ "shape": [
65
+ 22016,
66
+ 512
67
+ ],
68
+ "dtype": "uint32",
69
+ "format": "f32-to-bf16",
70
+ "nbytes": 45088768,
71
+ "byteOffset": 0
72
+ }
73
+ ],
74
+ "md5sum": "4a67c897593512bb619d55ebf0325675"
75
+ },
76
+ {
77
+ "dataPath": "params_shard_3.bin",
78
+ "format": "raw-shard",
79
+ "nbytes": 22544384,
80
+ "records": [
81
+ {
82
+ "name": "model.layers.0.mlp.down_proj.q_weight",
83
+ "shape": [
84
+ 4096,
85
+ 1376
86
+ ],
87
+ "dtype": "uint32",
88
+ "format": "f32-to-bf16",
89
+ "nbytes": 22544384,
90
+ "byteOffset": 0
91
+ }
92
+ ],
93
+ "md5sum": "2e00ba1bedd2ff2e87489c6583492e87"
94
+ },
95
+ {
96
+ "dataPath": "params_shard_4.bin",
97
+ "format": "raw-shard",
98
+ "nbytes": 25165824,
99
+ "records": [
100
+ {
101
+ "name": "model.layers.1.self_attn.qkv_proj.q_weight",
102
+ "shape": [
103
+ 12288,
104
+ 512
105
+ ],
106
+ "dtype": "uint32",
107
+ "format": "f32-to-bf16",
108
+ "nbytes": 25165824,
109
+ "byteOffset": 0
110
+ }
111
+ ],
112
+ "md5sum": "ee7beab1026365e49ede869f77ae62da"
113
+ },
114
+ {
115
+ "dataPath": "params_shard_5.bin",
116
+ "format": "raw-shard",
117
+ "nbytes": 32587776,
118
+ "records": [
119
+ {
120
+ "name": "model.layers.0.self_attn.qkv_proj.q_scale",
121
+ "shape": [
122
+ 12288,
123
+ 128
124
+ ],
125
+ "dtype": "float16",
126
+ "format": "f32-to-bf16",
127
+ "nbytes": 3145728,
128
+ "byteOffset": 0
129
+ },
130
+ {
131
+ "name": "model.layers.0.self_attn.o_proj.q_weight",
132
+ "shape": [
133
+ 4096,
134
+ 512
135
+ ],
136
+ "dtype": "uint32",
137
+ "format": "f32-to-bf16",
138
+ "nbytes": 8388608,
139
+ "byteOffset": 3145728
140
+ },
141
+ {
142
+ "name": "model.layers.0.self_attn.o_proj.q_scale",
143
+ "shape": [
144
+ 4096,
145
+ 128
146
+ ],
147
+ "dtype": "float16",
148
+ "format": "f32-to-bf16",
149
+ "nbytes": 1048576,
150
+ "byteOffset": 11534336
151
+ },
152
+ {
153
+ "name": "model.layers.0.mlp.gate_up_proj.q_scale",
154
+ "shape": [
155
+ 22016,
156
+ 128
157
+ ],
158
+ "dtype": "float16",
159
+ "format": "f32-to-bf16",
160
+ "nbytes": 5636096,
161
+ "byteOffset": 12582912
162
+ },
163
+ {
164
+ "name": "model.layers.0.mlp.down_proj.q_scale",
165
+ "shape": [
166
+ 4096,
167
+ 344
168
+ ],
169
+ "dtype": "float16",
170
+ "format": "f32-to-bf16",
171
+ "nbytes": 2818048,
172
+ "byteOffset": 18219008
173
+ },
174
+ {
175
+ "name": "model.layers.0.input_layernorm.weight",
176
+ "shape": [
177
+ 4096
178
+ ],
179
+ "dtype": "float16",
180
+ "format": "f32-to-bf16",
181
+ "nbytes": 8192,
182
+ "byteOffset": 21037056
183
+ },
184
+ {
185
+ "name": "model.layers.0.post_attention_layernorm.weight",
186
+ "shape": [
187
+ 4096
188
+ ],
189
+ "dtype": "float16",
190
+ "format": "f32-to-bf16",
191
+ "nbytes": 8192,
192
+ "byteOffset": 21045248
193
+ },
194
+ {
195
+ "name": "model.layers.1.self_attn.qkv_proj.q_scale",
196
+ "shape": [
197
+ 12288,
198
+ 128
199
+ ],
200
+ "dtype": "float16",
201
+ "format": "f32-to-bf16",
202
+ "nbytes": 3145728,
203
+ "byteOffset": 21053440
204
+ },
205
+ {
206
+ "name": "model.layers.1.self_attn.o_proj.q_weight",
207
+ "shape": [
208
+ 4096,
209
+ 512
210
+ ],
211
+ "dtype": "uint32",
212
+ "format": "f32-to-bf16",
213
+ "nbytes": 8388608,
214
+ "byteOffset": 24199168
215
+ }
216
+ ],
217
+ "md5sum": "3022c6ab78d72b512472d9859c8fc586"
218
+ },
219
+ {
220
+ "dataPath": "params_shard_6.bin",
221
+ "format": "raw-shard",
222
+ "nbytes": 45088768,
223
+ "records": [
224
+ {
225
+ "name": "model.layers.1.mlp.gate_up_proj.q_weight",
226
+ "shape": [
227
+ 22016,
228
+ 512
229
+ ],
230
+ "dtype": "uint32",
231
+ "format": "f32-to-bf16",
232
+ "nbytes": 45088768,
233
+ "byteOffset": 0
234
+ }
235
+ ],
236
+ "md5sum": "17147d4a8ad730c664d430f91f823be0"
237
+ },
238
+ {
239
+ "dataPath": "params_shard_7.bin",
240
+ "format": "raw-shard",
241
+ "nbytes": 25165824,
242
+ "records": [
243
+ {
244
+ "name": "model.layers.2.self_attn.qkv_proj.q_weight",
245
+ "shape": [
246
+ 12288,
247
+ 512
248
+ ],
249
+ "dtype": "uint32",
250
+ "format": "f32-to-bf16",
251
+ "nbytes": 25165824,
252
+ "byteOffset": 0
253
+ }
254
+ ],
255
+ "md5sum": "cb171bd0adfef7bbcc08fa2a9d3845a6"
256
+ },
257
+ {
258
+ "dataPath": "params_shard_8.bin",
259
+ "format": "raw-shard",
260
+ "nbytes": 32063488,
261
+ "records": [
262
+ {
263
+ "name": "model.layers.1.self_attn.o_proj.q_scale",
264
+ "shape": [
265
+ 4096,
266
+ 128
267
+ ],
268
+ "dtype": "float16",
269
+ "format": "f32-to-bf16",
270
+ "nbytes": 1048576,
271
+ "byteOffset": 0
272
+ },
273
+ {
274
+ "name": "model.layers.1.mlp.gate_up_proj.q_scale",
275
+ "shape": [
276
+ 22016,
277
+ 128
278
+ ],
279
+ "dtype": "float16",
280
+ "format": "f32-to-bf16",
281
+ "nbytes": 5636096,
282
+ "byteOffset": 1048576
283
+ },
284
+ {
285
+ "name": "model.layers.1.mlp.down_proj.q_weight",
286
+ "shape": [
287
+ 4096,
288
+ 1376
289
+ ],
290
+ "dtype": "uint32",
291
+ "format": "f32-to-bf16",
292
+ "nbytes": 22544384,
293
+ "byteOffset": 6684672
294
+ },
295
+ {
296
+ "name": "model.layers.1.mlp.down_proj.q_scale",
297
+ "shape": [
298
+ 4096,
299
+ 344
300
+ ],
301
+ "dtype": "float16",
302
+ "format": "f32-to-bf16",
303
+ "nbytes": 2818048,
304
+ "byteOffset": 29229056
305
+ },
306
+ {
307
+ "name": "model.layers.1.input_layernorm.weight",
308
+ "shape": [
309
+ 4096
310
+ ],
311
+ "dtype": "float16",
312
+ "format": "f32-to-bf16",
313
+ "nbytes": 8192,
314
+ "byteOffset": 32047104
315
+ },
316
+ {
317
+ "name": "model.layers.1.post_attention_layernorm.weight",
318
+ "shape": [
319
+ 4096
320
+ ],
321
+ "dtype": "float16",
322
+ "format": "f32-to-bf16",
323
+ "nbytes": 8192,
324
+ "byteOffset": 32055296
325
+ }
326
+ ],
327
+ "md5sum": "2de448c64208cb50733c0cc71d9cfc09"
328
+ },
329
+ {
330
+ "dataPath": "params_shard_9.bin",
331
+ "format": "raw-shard",
332
+ "nbytes": 45088768,
333
+ "records": [
334
+ {
335
+ "name": "model.layers.2.mlp.gate_up_proj.q_weight",
336
+ "shape": [
337
+ 22016,
338
+ 512
339
+ ],
340
+ "dtype": "uint32",
341
+ "format": "f32-to-bf16",
342
+ "nbytes": 45088768,
343
+ "byteOffset": 0
344
+ }
345
+ ],
346
+ "md5sum": "423bcd3e7b98839c0162318276c6dbb3"
347
+ },
348
+ {
349
+ "dataPath": "params_shard_10.bin",
350
+ "format": "raw-shard",
351
+ "nbytes": 22544384,
352
+ "records": [
353
+ {
354
+ "name": "model.layers.2.mlp.down_proj.q_weight",
355
+ "shape": [
356
+ 4096,
357
+ 1376
358
+ ],
359
+ "dtype": "uint32",
360
+ "format": "f32-to-bf16",
361
+ "nbytes": 22544384,
362
+ "byteOffset": 0
363
+ }
364
+ ],
365
+ "md5sum": "0a863f18524f26e06132a73a33ad023e"
366
+ },
367
+ {
368
+ "dataPath": "params_shard_11.bin",
369
+ "format": "raw-shard",
370
+ "nbytes": 25165824,
371
+ "records": [
372
+ {
373
+ "name": "model.layers.3.self_attn.qkv_proj.q_weight",
374
+ "shape": [
375
+ 12288,
376
+ 512
377
+ ],
378
+ "dtype": "uint32",
379
+ "format": "f32-to-bf16",
380
+ "nbytes": 25165824,
381
+ "byteOffset": 0
382
+ }
383
+ ],
384
+ "md5sum": "e7fce24651c953a4f1f7002ebb9c3174"
385
+ },
386
+ {
387
+ "dataPath": "params_shard_12.bin",
388
+ "format": "raw-shard",
389
+ "nbytes": 32587776,
390
+ "records": [
391
+ {
392
+ "name": "model.layers.2.self_attn.qkv_proj.q_scale",
393
+ "shape": [
394
+ 12288,
395
+ 128
396
+ ],
397
+ "dtype": "float16",
398
+ "format": "f32-to-bf16",
399
+ "nbytes": 3145728,
400
+ "byteOffset": 0
401
+ },
402
+ {
403
+ "name": "model.layers.2.self_attn.o_proj.q_weight",
404
+ "shape": [
405
+ 4096,
406
+ 512
407
+ ],
408
+ "dtype": "uint32",
409
+ "format": "f32-to-bf16",
410
+ "nbytes": 8388608,
411
+ "byteOffset": 3145728
412
+ },
413
+ {
414
+ "name": "model.layers.2.self_attn.o_proj.q_scale",
415
+ "shape": [
416
+ 4096,
417
+ 128
418
+ ],
419
+ "dtype": "float16",
420
+ "format": "f32-to-bf16",
421
+ "nbytes": 1048576,
422
+ "byteOffset": 11534336
423
+ },
424
+ {
425
+ "name": "model.layers.2.mlp.gate_up_proj.q_scale",
426
+ "shape": [
427
+ 22016,
428
+ 128
429
+ ],
430
+ "dtype": "float16",
431
+ "format": "f32-to-bf16",
432
+ "nbytes": 5636096,
433
+ "byteOffset": 12582912
434
+ },
435
+ {
436
+ "name": "model.layers.2.mlp.down_proj.q_scale",
437
+ "shape": [
438
+ 4096,
439
+ 344
440
+ ],
441
+ "dtype": "float16",
442
+ "format": "f32-to-bf16",
443
+ "nbytes": 2818048,
444
+ "byteOffset": 18219008
445
+ },
446
+ {
447
+ "name": "model.layers.2.input_layernorm.weight",
448
+ "shape": [
449
+ 4096
450
+ ],
451
+ "dtype": "float16",
452
+ "format": "f32-to-bf16",
453
+ "nbytes": 8192,
454
+ "byteOffset": 21037056
455
+ },
456
+ {
457
+ "name": "model.layers.2.post_attention_layernorm.weight",
458
+ "shape": [
459
+ 4096
460
+ ],
461
+ "dtype": "float16",
462
+ "format": "f32-to-bf16",
463
+ "nbytes": 8192,
464
+ "byteOffset": 21045248
465
+ },
466
+ {
467
+ "name": "model.layers.3.self_attn.qkv_proj.q_scale",
468
+ "shape": [
469
+ 12288,
470
+ 128
471
+ ],
472
+ "dtype": "float16",
473
+ "format": "f32-to-bf16",
474
+ "nbytes": 3145728,
475
+ "byteOffset": 21053440
476
+ },
477
+ {
478
+ "name": "model.layers.3.self_attn.o_proj.q_weight",
479
+ "shape": [
480
+ 4096,
481
+ 512
482
+ ],
483
+ "dtype": "uint32",
484
+ "format": "f32-to-bf16",
485
+ "nbytes": 8388608,
486
+ "byteOffset": 24199168
487
+ }
488
+ ],
489
+ "md5sum": "3dc9af9bab07bfb4d9811d6a8bc3e50b"
490
+ },
491
+ {
492
+ "dataPath": "params_shard_13.bin",
493
+ "format": "raw-shard",
494
+ "nbytes": 45088768,
495
+ "records": [
496
+ {
497
+ "name": "model.layers.3.mlp.gate_up_proj.q_weight",
498
+ "shape": [
499
+ 22016,
500
+ 512
501
+ ],
502
+ "dtype": "uint32",
503
+ "format": "f32-to-bf16",
504
+ "nbytes": 45088768,
505
+ "byteOffset": 0
506
+ }
507
+ ],
508
+ "md5sum": "5577b15a8daf2ae1005e7ad4f88e9ded"
509
+ },
510
+ {
511
+ "dataPath": "params_shard_14.bin",
512
+ "format": "raw-shard",
513
+ "nbytes": 25165824,
514
+ "records": [
515
+ {
516
+ "name": "model.layers.4.self_attn.qkv_proj.q_weight",
517
+ "shape": [
518
+ 12288,
519
+ 512
520
+ ],
521
+ "dtype": "uint32",
522
+ "format": "f32-to-bf16",
523
+ "nbytes": 25165824,
524
+ "byteOffset": 0
525
+ }
526
+ ],
527
+ "md5sum": "e416ee6940c00d9eb6b4f12444857158"
528
+ },
529
+ {
530
+ "dataPath": "params_shard_15.bin",
531
+ "format": "raw-shard",
532
+ "nbytes": 32063488,
533
+ "records": [
534
+ {
535
+ "name": "model.layers.3.self_attn.o_proj.q_scale",
536
+ "shape": [
537
+ 4096,
538
+ 128
539
+ ],
540
+ "dtype": "float16",
541
+ "format": "f32-to-bf16",
542
+ "nbytes": 1048576,
543
+ "byteOffset": 0
544
+ },
545
+ {
546
+ "name": "model.layers.3.mlp.gate_up_proj.q_scale",
547
+ "shape": [
548
+ 22016,
549
+ 128
550
+ ],
551
+ "dtype": "float16",
552
+ "format": "f32-to-bf16",
553
+ "nbytes": 5636096,
554
+ "byteOffset": 1048576
555
+ },
556
+ {
557
+ "name": "model.layers.3.mlp.down_proj.q_weight",
558
+ "shape": [
559
+ 4096,
560
+ 1376
561
+ ],
562
+ "dtype": "uint32",
563
+ "format": "f32-to-bf16",
564
+ "nbytes": 22544384,
565
+ "byteOffset": 6684672
566
+ },
567
+ {
568
+ "name": "model.layers.3.mlp.down_proj.q_scale",
569
+ "shape": [
570
+ 4096,
571
+ 344
572
+ ],
573
+ "dtype": "float16",
574
+ "format": "f32-to-bf16",
575
+ "nbytes": 2818048,
576
+ "byteOffset": 29229056
577
+ },
578
+ {
579
+ "name": "model.layers.3.input_layernorm.weight",
580
+ "shape": [
581
+ 4096
582
+ ],
583
+ "dtype": "float16",
584
+ "format": "f32-to-bf16",
585
+ "nbytes": 8192,
586
+ "byteOffset": 32047104
587
+ },
588
+ {
589
+ "name": "model.layers.3.post_attention_layernorm.weight",
590
+ "shape": [
591
+ 4096
592
+ ],
593
+ "dtype": "float16",
594
+ "format": "f32-to-bf16",
595
+ "nbytes": 8192,
596
+ "byteOffset": 32055296
597
+ }
598
+ ],
599
+ "md5sum": "5633c5a4f580f309dde8f1dbb283fe34"
600
+ },
601
+ {
602
+ "dataPath": "params_shard_16.bin",
603
+ "format": "raw-shard",
604
+ "nbytes": 45088768,
605
+ "records": [
606
+ {
607
+ "name": "model.layers.4.mlp.gate_up_proj.q_weight",
608
+ "shape": [
609
+ 22016,
610
+ 512
611
+ ],
612
+ "dtype": "uint32",
613
+ "format": "f32-to-bf16",
614
+ "nbytes": 45088768,
615
+ "byteOffset": 0
616
+ }
617
+ ],
618
+ "md5sum": "c1c05d00a63fd2310e9277e1afe994e8"
619
+ },
620
+ {
621
+ "dataPath": "params_shard_17.bin",
622
+ "format": "raw-shard",
623
+ "nbytes": 22544384,
624
+ "records": [
625
+ {
626
+ "name": "model.layers.4.mlp.down_proj.q_weight",
627
+ "shape": [
628
+ 4096,
629
+ 1376
630
+ ],
631
+ "dtype": "uint32",
632
+ "format": "f32-to-bf16",
633
+ "nbytes": 22544384,
634
+ "byteOffset": 0
635
+ }
636
+ ],
637
+ "md5sum": "a25e26f7a02645b6fdc6b884c3666702"
638
+ },
639
+ {
640
+ "dataPath": "params_shard_18.bin",
641
+ "format": "raw-shard",
642
+ "nbytes": 25165824,
643
+ "records": [
644
+ {
645
+ "name": "model.layers.5.self_attn.qkv_proj.q_weight",
646
+ "shape": [
647
+ 12288,
648
+ 512
649
+ ],
650
+ "dtype": "uint32",
651
+ "format": "f32-to-bf16",
652
+ "nbytes": 25165824,
653
+ "byteOffset": 0
654
+ }
655
+ ],
656
+ "md5sum": "d866d6f43426a7a64c32b2d6948a6b47"
657
+ },
658
+ {
659
+ "dataPath": "params_shard_19.bin",
660
+ "format": "raw-shard",
661
+ "nbytes": 32587776,
662
+ "records": [
663
+ {
664
+ "name": "model.layers.4.self_attn.qkv_proj.q_scale",
665
+ "shape": [
666
+ 12288,
667
+ 128
668
+ ],
669
+ "dtype": "float16",
670
+ "format": "f32-to-bf16",
671
+ "nbytes": 3145728,
672
+ "byteOffset": 0
673
+ },
674
+ {
675
+ "name": "model.layers.4.self_attn.o_proj.q_weight",
676
+ "shape": [
677
+ 4096,
678
+ 512
679
+ ],
680
+ "dtype": "uint32",
681
+ "format": "f32-to-bf16",
682
+ "nbytes": 8388608,
683
+ "byteOffset": 3145728
684
+ },
685
+ {
686
+ "name": "model.layers.4.self_attn.o_proj.q_scale",
687
+ "shape": [
688
+ 4096,
689
+ 128
690
+ ],
691
+ "dtype": "float16",
692
+ "format": "f32-to-bf16",
693
+ "nbytes": 1048576,
694
+ "byteOffset": 11534336
695
+ },
696
+ {
697
+ "name": "model.layers.4.mlp.gate_up_proj.q_scale",
698
+ "shape": [
699
+ 22016,
700
+ 128
701
+ ],
702
+ "dtype": "float16",
703
+ "format": "f32-to-bf16",
704
+ "nbytes": 5636096,
705
+ "byteOffset": 12582912
706
+ },
707
+ {
708
+ "name": "model.layers.4.mlp.down_proj.q_scale",
709
+ "shape": [
710
+ 4096,
711
+ 344
712
+ ],
713
+ "dtype": "float16",
714
+ "format": "f32-to-bf16",
715
+ "nbytes": 2818048,
716
+ "byteOffset": 18219008
717
+ },
718
+ {
719
+ "name": "model.layers.4.input_layernorm.weight",
720
+ "shape": [
721
+ 4096
722
+ ],
723
+ "dtype": "float16",
724
+ "format": "f32-to-bf16",
725
+ "nbytes": 8192,
726
+ "byteOffset": 21037056
727
+ },
728
+ {
729
+ "name": "model.layers.4.post_attention_layernorm.weight",
730
+ "shape": [
731
+ 4096
732
+ ],
733
+ "dtype": "float16",
734
+ "format": "f32-to-bf16",
735
+ "nbytes": 8192,
736
+ "byteOffset": 21045248
737
+ },
738
+ {
739
+ "name": "model.layers.5.self_attn.qkv_proj.q_scale",
740
+ "shape": [
741
+ 12288,
742
+ 128
743
+ ],
744
+ "dtype": "float16",
745
+ "format": "f32-to-bf16",
746
+ "nbytes": 3145728,
747
+ "byteOffset": 21053440
748
+ },
749
+ {
750
+ "name": "model.layers.5.self_attn.o_proj.q_weight",
751
+ "shape": [
752
+ 4096,
753
+ 512
754
+ ],
755
+ "dtype": "uint32",
756
+ "format": "f32-to-bf16",
757
+ "nbytes": 8388608,
758
+ "byteOffset": 24199168
759
+ }
760
+ ],
761
+ "md5sum": "cc26b507fe975f21c9131cde44f3907a"
762
+ },
763
+ {
764
+ "dataPath": "params_shard_20.bin",
765
+ "format": "raw-shard",
766
+ "nbytes": 45088768,
767
+ "records": [
768
+ {
769
+ "name": "model.layers.5.mlp.gate_up_proj.q_weight",
770
+ "shape": [
771
+ 22016,
772
+ 512
773
+ ],
774
+ "dtype": "uint32",
775
+ "format": "f32-to-bf16",
776
+ "nbytes": 45088768,
777
+ "byteOffset": 0
778
+ }
779
+ ],
780
+ "md5sum": "4aa7af8962ab8a9dd9d48986eec4947f"
781
+ },
782
+ {
783
+ "dataPath": "params_shard_21.bin",
784
+ "format": "raw-shard",
785
+ "nbytes": 65536000,
786
+ "records": [
787
+ {
788
+ "name": "lm_head.q_weight",
789
+ "shape": [
790
+ 32000,
791
+ 512
792
+ ],
793
+ "dtype": "uint32",
794
+ "format": "f32-to-bf16",
795
+ "nbytes": 65536000,
796
+ "byteOffset": 0
797
+ }
798
+ ],
799
+ "md5sum": "2e559f694c15d0d40bf21fbb1d025ee8"
800
+ },
801
+ {
802
+ "dataPath": "params_shard_22.bin",
803
+ "format": "raw-shard",
804
+ "nbytes": 32071680,
805
+ "records": [
806
+ {
807
+ "name": "model.layers.5.self_attn.o_proj.q_scale",
808
+ "shape": [
809
+ 4096,
810
+ 128
811
+ ],
812
+ "dtype": "float16",
813
+ "format": "f32-to-bf16",
814
+ "nbytes": 1048576,
815
+ "byteOffset": 0
816
+ },
817
+ {
818
+ "name": "model.layers.5.mlp.gate_up_proj.q_scale",
819
+ "shape": [
820
+ 22016,
821
+ 128
822
+ ],
823
+ "dtype": "float16",
824
+ "format": "f32-to-bf16",
825
+ "nbytes": 5636096,
826
+ "byteOffset": 1048576
827
+ },
828
+ {
829
+ "name": "model.layers.5.mlp.down_proj.q_weight",
830
+ "shape": [
831
+ 4096,
832
+ 1376
833
+ ],
834
+ "dtype": "uint32",
835
+ "format": "f32-to-bf16",
836
+ "nbytes": 22544384,
837
+ "byteOffset": 6684672
838
+ },
839
+ {
840
+ "name": "model.layers.5.mlp.down_proj.q_scale",
841
+ "shape": [
842
+ 4096,
843
+ 344
844
+ ],
845
+ "dtype": "float16",
846
+ "format": "f32-to-bf16",
847
+ "nbytes": 2818048,
848
+ "byteOffset": 29229056
849
+ },
850
+ {
851
+ "name": "model.layers.5.input_layernorm.weight",
852
+ "shape": [
853
+ 4096
854
+ ],
855
+ "dtype": "float16",
856
+ "format": "f32-to-bf16",
857
+ "nbytes": 8192,
858
+ "byteOffset": 32047104
859
+ },
860
+ {
861
+ "name": "model.layers.5.post_attention_layernorm.weight",
862
+ "shape": [
863
+ 4096
864
+ ],
865
+ "dtype": "float16",
866
+ "format": "f32-to-bf16",
867
+ "nbytes": 8192,
868
+ "byteOffset": 32055296
869
+ },
870
+ {
871
+ "name": "model.norm.weight",
872
+ "shape": [
873
+ 4096
874
+ ],
875
+ "dtype": "float16",
876
+ "format": "f32-to-bf16",
877
+ "nbytes": 8192,
878
+ "byteOffset": 32063488
879
+ }
880
+ ],
881
+ "md5sum": "3026df031148ab773e9b7047ad28afe5"
882
+ },
883
+ {
884
+ "dataPath": "params_shard_23.bin",
885
+ "format": "raw-shard",
886
+ "nbytes": 8192000,
887
+ "records": [
888
+ {
889
+ "name": "lm_head.q_scale",
890
+ "shape": [
891
+ 32000,
892
+ 128
893
+ ],
894
+ "dtype": "float16",
895
+ "format": "f32-to-bf16",
896
+ "nbytes": 8192000,
897
+ "byteOffset": 0
898
+ }
899
+ ],
900
+ "md5sum": "315a4c72ed5633971de22527b937ecf7"
901
+ }
902
+ ]
903
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd1d240c485547c1dc5fe60edb6124bd40a53f30b62d276920e9b160d2f3f6c0
3
+ size 65536000
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f192f28fe0b2df1dac654a893720dac894b183ca7f996022639c07d9d399c6a
3
+ size 33357824
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:148f026e5ac30cb7e0d67b7473d1f02dd528f5ef5e74875a36623499de1e0679
3
+ size 22544384
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2400615bdfb2e3d1558b43b9c54e837897f124dcafaf740aa80f19880002c7f7
3
+ size 25165824
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f483e65d8eae894c7ef701be0cc022cc7fb5fd17a06b641137a9749cd3fcc50a
3
+ size 32587776
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2bbc6f9e4fb7e24ac3f97434355294cc7c83316937f9f5b50db38a885f3edcd
3
+ size 45088768
params_shard_14.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca81152912bb4ff35153fe140fdcc6402e79b2781524f200b384fab5569452b4
3
+ size 25165824
params_shard_15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d318b44d45810766140f06dbd75e2e9b28f2ec65b016136be38c197e19175264
3
+ size 32063488
params_shard_16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e41ad34d88f0adbd5d682815eab11564a0165ff053c500eabbc1e25297a9c35
3
+ size 45088768
params_shard_17.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9056e5105dc6c36431a29e7b5b9b158b45dbb1f83b970feca36e1c21fcc0141
3
+ size 22544384
params_shard_18.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adfb0d8729353da77d4bb4ed91234eb63d243c5c9e44bc5f630a566f5916a631
3
+ size 25165824
params_shard_19.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65680a77b33d317d9ea9122de446104a8b5c2710c1de5b4b968dd78dd299bbc3
3
+ size 32587776
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:331f0a70a3893c6f459ce0538bddb173cc0da631378a3418f5b631f8b26017bf
3
+ size 45088768
params_shard_20.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8f9fff31727ebd70fa31ac8df31c62d54b7c1e7c5aa6de389ccaec37068a80b
3
+ size 45088768
params_shard_21.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78b29c628c04736036f7f469a8d80ad5ac107c3ae60a8d5937bbae6f8a59113f
3
+ size 65536000
params_shard_22.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43e9fb4ae2b2a60af9e56020ef97d384034ba2ba5cb3f2932865cffd59e1c5a4
3
+ size 32071680
params_shard_23.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66331acaadba40f55a958e458f8a3848113425d3fc1177b549126c7cf709ef33
3
+ size 8192000
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:699334ca329e40c43693773ce3c87bf99e79dd1392bdff48259de06c9a62b162
3
+ size 22544384
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fc58d4610ed90f46d0c01b2ac92e67e6c4c9eae333d97bbd7d447f111e500d7
3
+ size 25165824
params_shard_5.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b444bf3942d67b47121883c3ab9b4d45bf7a0d9da35c482ced849ee4b50fb8a7
3
+ size 32587776
params_shard_6.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2030d627ffddb756f043d4654711c5192417095377bbc2846b41049909502a96
3
+ size 45088768
params_shard_7.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5a623d91b2dfc3ba674d1ec2a730af6e119260d4a1209f16bca45914ef30991
3
+ size 25165824
params_shard_8.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d63db6f8491f163a5f31f5447877875c5fc0cf641df22af233705215507ec302
3
+ size 32063488
params_shard_9.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a9a522ae49291ebdd893c52b19d80a1bbc291a63a79773ccea5372a364f6774
3
+ size 45088768
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<s>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "clean_up_tokenization_spaces": false,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "</s>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "model_max_length": 2048,
22
+ "pad_token": null,
23
+ "padding_side": "right",
24
+ "sp_model_kwargs": {},
25
+ "tokenizer_class": "LlamaTokenizer",
26
+ "unk_token": {
27
+ "__type": "AddedToken",
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
+ }