| { |
| "metadata": { |
| "ParamSize": 267, |
| "ParamBytes": 277996288.0, |
| "BitsPerParam": 4.501665573729716 |
| }, |
| "records": [ |
| { |
| "dataPath": "params_shard_0.bin", |
| "format": "raw-shard", |
| "nbytes": 68067328, |
| "records": [ |
| { |
| "name": "model.embed_tokens.q_weight", |
| "shape": [ |
| 151936, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 68067328, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a510a333a3a765ded99247f2d1305267" |
| }, |
| { |
| "dataPath": "params_shard_1.bin", |
| "format": "raw-shard", |
| "nbytes": 33234176, |
| "records": [ |
| { |
| "name": "model.embed_tokens.q_scale", |
| "shape": [ |
| 151936, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8508416, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.0.input_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 8508416 |
| }, |
| { |
| "name": "model.layers.0.mlp.down_proj.q_weight", |
| "shape": [ |
| 896, |
| 608 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2179072, |
| "byteOffset": 8510208 |
| }, |
| { |
| "name": "model.layers.0.mlp.down_proj.q_scale", |
| "shape": [ |
| 896, |
| 152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 272384, |
| "byteOffset": 10689280 |
| }, |
| { |
| "name": "model.layers.0.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 9728, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4358144, |
| "byteOffset": 10961664 |
| }, |
| { |
| "name": "model.layers.0.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 9728, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 544768, |
| "byteOffset": 15319808 |
| }, |
| { |
| "name": "model.layers.0.post_attention_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 15864576 |
| }, |
| { |
| "name": "model.layers.0.self_attn.c_attn.bias", |
| "shape": [ |
| 1152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2304, |
| "byteOffset": 15866368 |
| }, |
| { |
| "name": "model.layers.0.self_attn.c_attn.q_weight", |
| "shape": [ |
| 1152, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 516096, |
| "byteOffset": 15868672 |
| }, |
| { |
| "name": "model.layers.0.self_attn.c_attn.q_scale", |
| "shape": [ |
| 1152, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 64512, |
| "byteOffset": 16384768 |
| }, |
| { |
| "name": "model.layers.0.self_attn.o_proj.q_weight", |
| "shape": [ |
| 896, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 401408, |
| "byteOffset": 16449280 |
| }, |
| { |
| "name": "model.layers.0.self_attn.o_proj.q_scale", |
| "shape": [ |
| 896, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 50176, |
| "byteOffset": 16850688 |
| }, |
| { |
| "name": "model.layers.1.input_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 16900864 |
| }, |
| { |
| "name": "model.layers.1.mlp.down_proj.q_weight", |
| "shape": [ |
| 896, |
| 608 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2179072, |
| "byteOffset": 16902656 |
| }, |
| { |
| "name": "model.layers.1.mlp.down_proj.q_scale", |
| "shape": [ |
| 896, |
| 152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 272384, |
| "byteOffset": 19081728 |
| }, |
| { |
| "name": "model.layers.1.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 9728, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4358144, |
| "byteOffset": 19354112 |
| }, |
| { |
| "name": "model.layers.1.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 9728, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 544768, |
| "byteOffset": 23712256 |
| }, |
| { |
| "name": "model.layers.1.post_attention_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 24257024 |
| }, |
| { |
| "name": "model.layers.1.self_attn.c_attn.bias", |
| "shape": [ |
| 1152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2304, |
| "byteOffset": 24258816 |
| }, |
| { |
| "name": "model.layers.1.self_attn.c_attn.q_weight", |
| "shape": [ |
| 1152, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 516096, |
| "byteOffset": 24261120 |
| }, |
| { |
| "name": "model.layers.1.self_attn.c_attn.q_scale", |
| "shape": [ |
| 1152, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 64512, |
| "byteOffset": 24777216 |
| }, |
| { |
| "name": "model.layers.1.self_attn.o_proj.q_weight", |
| "shape": [ |
| 896, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 401408, |
| "byteOffset": 24841728 |
| }, |
| { |
| "name": "model.layers.1.self_attn.o_proj.q_scale", |
| "shape": [ |
| 896, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 50176, |
| "byteOffset": 25243136 |
| }, |
| { |
| "name": "model.layers.10.input_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 25293312 |
| }, |
| { |
| "name": "model.layers.10.mlp.down_proj.q_weight", |
| "shape": [ |
| 896, |
| 608 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2179072, |
| "byteOffset": 25295104 |
| }, |
| { |
| "name": "model.layers.10.mlp.down_proj.q_scale", |
| "shape": [ |
| 896, |
| 152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 272384, |
| "byteOffset": 27474176 |
| }, |
| { |
| "name": "model.layers.10.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 9728, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4358144, |
| "byteOffset": 27746560 |
| }, |
| { |
| "name": "model.layers.10.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 9728, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 544768, |
| "byteOffset": 32104704 |
| }, |
| { |
| "name": "model.layers.10.post_attention_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 32649472 |
| }, |
| { |
| "name": "model.layers.10.self_attn.c_attn.bias", |
| "shape": [ |
| 1152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2304, |
| "byteOffset": 32651264 |
| }, |
| { |
| "name": "model.layers.10.self_attn.c_attn.q_weight", |
| "shape": [ |
| 1152, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 516096, |
| "byteOffset": 32653568 |
| }, |
| { |
| "name": "model.layers.10.self_attn.c_attn.q_scale", |
| "shape": [ |
| 1152, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 64512, |
| "byteOffset": 33169664 |
| } |
| ], |
| "md5sum": "ba4423d42ed86cf56420e7df4e070d99" |
| }, |
| { |
| "dataPath": "params_shard_2.bin", |
| "format": "raw-shard", |
| "nbytes": 33505280, |
| "records": [ |
| { |
| "name": "model.layers.10.self_attn.o_proj.q_weight", |
| "shape": [ |
| 896, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 401408, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.10.self_attn.o_proj.q_scale", |
| "shape": [ |
| 896, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 50176, |
| "byteOffset": 401408 |
| }, |
| { |
| "name": "model.layers.11.input_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 451584 |
| }, |
| { |
| "name": "model.layers.11.mlp.down_proj.q_weight", |
| "shape": [ |
| 896, |
| 608 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2179072, |
| "byteOffset": 453376 |
| }, |
| { |
| "name": "model.layers.11.mlp.down_proj.q_scale", |
| "shape": [ |
| 896, |
| 152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 272384, |
| "byteOffset": 2632448 |
| }, |
| { |
| "name": "model.layers.11.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 9728, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4358144, |
| "byteOffset": 2904832 |
| }, |
| { |
| "name": "model.layers.11.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 9728, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 544768, |
| "byteOffset": 7262976 |
| }, |
| { |
| "name": "model.layers.11.post_attention_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 7807744 |
| }, |
| { |
| "name": "model.layers.11.self_attn.c_attn.bias", |
| "shape": [ |
| 1152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2304, |
| "byteOffset": 7809536 |
| }, |
| { |
| "name": "model.layers.11.self_attn.c_attn.q_weight", |
| "shape": [ |
| 1152, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 516096, |
| "byteOffset": 7811840 |
| }, |
| { |
| "name": "model.layers.11.self_attn.c_attn.q_scale", |
| "shape": [ |
| 1152, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 64512, |
| "byteOffset": 8327936 |
| }, |
| { |
| "name": "model.layers.11.self_attn.o_proj.q_weight", |
| "shape": [ |
| 896, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 401408, |
| "byteOffset": 8392448 |
| }, |
| { |
| "name": "model.layers.11.self_attn.o_proj.q_scale", |
| "shape": [ |
| 896, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 50176, |
| "byteOffset": 8793856 |
| }, |
| { |
| "name": "model.layers.12.input_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 8844032 |
| }, |
| { |
| "name": "model.layers.12.mlp.down_proj.q_weight", |
| "shape": [ |
| 896, |
| 608 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2179072, |
| "byteOffset": 8845824 |
| }, |
| { |
| "name": "model.layers.12.mlp.down_proj.q_scale", |
| "shape": [ |
| 896, |
| 152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 272384, |
| "byteOffset": 11024896 |
| }, |
| { |
| "name": "model.layers.12.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 9728, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4358144, |
| "byteOffset": 11297280 |
| }, |
| { |
| "name": "model.layers.12.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 9728, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 544768, |
| "byteOffset": 15655424 |
| }, |
| { |
| "name": "model.layers.12.post_attention_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 16200192 |
| }, |
| { |
| "name": "model.layers.12.self_attn.c_attn.bias", |
| "shape": [ |
| 1152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2304, |
| "byteOffset": 16201984 |
| }, |
| { |
| "name": "model.layers.12.self_attn.c_attn.q_weight", |
| "shape": [ |
| 1152, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 516096, |
| "byteOffset": 16204288 |
| }, |
| { |
| "name": "model.layers.12.self_attn.c_attn.q_scale", |
| "shape": [ |
| 1152, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 64512, |
| "byteOffset": 16720384 |
| }, |
| { |
| "name": "model.layers.12.self_attn.o_proj.q_weight", |
| "shape": [ |
| 896, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 401408, |
| "byteOffset": 16784896 |
| }, |
| { |
| "name": "model.layers.12.self_attn.o_proj.q_scale", |
| "shape": [ |
| 896, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 50176, |
| "byteOffset": 17186304 |
| }, |
| { |
| "name": "model.layers.13.input_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 17236480 |
| }, |
| { |
| "name": "model.layers.13.mlp.down_proj.q_weight", |
| "shape": [ |
| 896, |
| 608 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2179072, |
| "byteOffset": 17238272 |
| }, |
| { |
| "name": "model.layers.13.mlp.down_proj.q_scale", |
| "shape": [ |
| 896, |
| 152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 272384, |
| "byteOffset": 19417344 |
| }, |
| { |
| "name": "model.layers.13.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 9728, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4358144, |
| "byteOffset": 19689728 |
| }, |
| { |
| "name": "model.layers.13.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 9728, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 544768, |
| "byteOffset": 24047872 |
| }, |
| { |
| "name": "model.layers.13.post_attention_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 24592640 |
| }, |
| { |
| "name": "model.layers.13.self_attn.c_attn.bias", |
| "shape": [ |
| 1152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2304, |
| "byteOffset": 24594432 |
| }, |
| { |
| "name": "model.layers.13.self_attn.c_attn.q_weight", |
| "shape": [ |
| 1152, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 516096, |
| "byteOffset": 24596736 |
| }, |
| { |
| "name": "model.layers.13.self_attn.c_attn.q_scale", |
| "shape": [ |
| 1152, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 64512, |
| "byteOffset": 25112832 |
| }, |
| { |
| "name": "model.layers.13.self_attn.o_proj.q_weight", |
| "shape": [ |
| 896, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 401408, |
| "byteOffset": 25177344 |
| }, |
| { |
| "name": "model.layers.13.self_attn.o_proj.q_scale", |
| "shape": [ |
| 896, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 50176, |
| "byteOffset": 25578752 |
| }, |
| { |
| "name": "model.layers.14.input_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 25628928 |
| }, |
| { |
| "name": "model.layers.14.mlp.down_proj.q_weight", |
| "shape": [ |
| 896, |
| 608 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2179072, |
| "byteOffset": 25630720 |
| }, |
| { |
| "name": "model.layers.14.mlp.down_proj.q_scale", |
| "shape": [ |
| 896, |
| 152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 272384, |
| "byteOffset": 27809792 |
| }, |
| { |
| "name": "model.layers.14.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 9728, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4358144, |
| "byteOffset": 28082176 |
| }, |
| { |
| "name": "model.layers.14.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 9728, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 544768, |
| "byteOffset": 32440320 |
| }, |
| { |
| "name": "model.layers.14.post_attention_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 32985088 |
| }, |
| { |
| "name": "model.layers.14.self_attn.c_attn.bias", |
| "shape": [ |
| 1152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2304, |
| "byteOffset": 32986880 |
| }, |
| { |
| "name": "model.layers.14.self_attn.c_attn.q_weight", |
| "shape": [ |
| 1152, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 516096, |
| "byteOffset": 32989184 |
| } |
| ], |
| "md5sum": "9de0657bae9b2c140edab438464bc476" |
| }, |
| { |
| "dataPath": "params_shard_3.bin", |
| "format": "raw-shard", |
| "nbytes": 33053696, |
| "records": [ |
| { |
| "name": "model.layers.14.self_attn.c_attn.q_scale", |
| "shape": [ |
| 1152, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 64512, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.14.self_attn.o_proj.q_weight", |
| "shape": [ |
| 896, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 401408, |
| "byteOffset": 64512 |
| }, |
| { |
| "name": "model.layers.14.self_attn.o_proj.q_scale", |
| "shape": [ |
| 896, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 50176, |
| "byteOffset": 465920 |
| }, |
| { |
| "name": "model.layers.15.input_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 516096 |
| }, |
| { |
| "name": "model.layers.15.mlp.down_proj.q_weight", |
| "shape": [ |
| 896, |
| 608 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2179072, |
| "byteOffset": 517888 |
| }, |
| { |
| "name": "model.layers.15.mlp.down_proj.q_scale", |
| "shape": [ |
| 896, |
| 152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 272384, |
| "byteOffset": 2696960 |
| }, |
| { |
| "name": "model.layers.15.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 9728, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4358144, |
| "byteOffset": 2969344 |
| }, |
| { |
| "name": "model.layers.15.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 9728, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 544768, |
| "byteOffset": 7327488 |
| }, |
| { |
| "name": "model.layers.15.post_attention_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 7872256 |
| }, |
| { |
| "name": "model.layers.15.self_attn.c_attn.bias", |
| "shape": [ |
| 1152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2304, |
| "byteOffset": 7874048 |
| }, |
| { |
| "name": "model.layers.15.self_attn.c_attn.q_weight", |
| "shape": [ |
| 1152, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 516096, |
| "byteOffset": 7876352 |
| }, |
| { |
| "name": "model.layers.15.self_attn.c_attn.q_scale", |
| "shape": [ |
| 1152, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 64512, |
| "byteOffset": 8392448 |
| }, |
| { |
| "name": "model.layers.15.self_attn.o_proj.q_weight", |
| "shape": [ |
| 896, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 401408, |
| "byteOffset": 8456960 |
| }, |
| { |
| "name": "model.layers.15.self_attn.o_proj.q_scale", |
| "shape": [ |
| 896, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 50176, |
| "byteOffset": 8858368 |
| }, |
| { |
| "name": "model.layers.16.input_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 8908544 |
| }, |
| { |
| "name": "model.layers.16.mlp.down_proj.q_weight", |
| "shape": [ |
| 896, |
| 608 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2179072, |
| "byteOffset": 8910336 |
| }, |
| { |
| "name": "model.layers.16.mlp.down_proj.q_scale", |
| "shape": [ |
| 896, |
| 152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 272384, |
| "byteOffset": 11089408 |
| }, |
| { |
| "name": "model.layers.16.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 9728, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4358144, |
| "byteOffset": 11361792 |
| }, |
| { |
| "name": "model.layers.16.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 9728, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 544768, |
| "byteOffset": 15719936 |
| }, |
| { |
| "name": "model.layers.16.post_attention_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 16264704 |
| }, |
| { |
| "name": "model.layers.16.self_attn.c_attn.bias", |
| "shape": [ |
| 1152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2304, |
| "byteOffset": 16266496 |
| }, |
| { |
| "name": "model.layers.16.self_attn.c_attn.q_weight", |
| "shape": [ |
| 1152, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 516096, |
| "byteOffset": 16268800 |
| }, |
| { |
| "name": "model.layers.16.self_attn.c_attn.q_scale", |
| "shape": [ |
| 1152, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 64512, |
| "byteOffset": 16784896 |
| }, |
| { |
| "name": "model.layers.16.self_attn.o_proj.q_weight", |
| "shape": [ |
| 896, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 401408, |
| "byteOffset": 16849408 |
| }, |
| { |
| "name": "model.layers.16.self_attn.o_proj.q_scale", |
| "shape": [ |
| 896, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 50176, |
| "byteOffset": 17250816 |
| }, |
| { |
| "name": "model.layers.17.input_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 17300992 |
| }, |
| { |
| "name": "model.layers.17.mlp.down_proj.q_weight", |
| "shape": [ |
| 896, |
| 608 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2179072, |
| "byteOffset": 17302784 |
| }, |
| { |
| "name": "model.layers.17.mlp.down_proj.q_scale", |
| "shape": [ |
| 896, |
| 152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 272384, |
| "byteOffset": 19481856 |
| }, |
| { |
| "name": "model.layers.17.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 9728, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4358144, |
| "byteOffset": 19754240 |
| }, |
| { |
| "name": "model.layers.17.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 9728, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 544768, |
| "byteOffset": 24112384 |
| }, |
| { |
| "name": "model.layers.17.post_attention_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 24657152 |
| }, |
| { |
| "name": "model.layers.17.self_attn.c_attn.bias", |
| "shape": [ |
| 1152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2304, |
| "byteOffset": 24658944 |
| }, |
| { |
| "name": "model.layers.17.self_attn.c_attn.q_weight", |
| "shape": [ |
| 1152, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 516096, |
| "byteOffset": 24661248 |
| }, |
| { |
| "name": "model.layers.17.self_attn.c_attn.q_scale", |
| "shape": [ |
| 1152, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 64512, |
| "byteOffset": 25177344 |
| }, |
| { |
| "name": "model.layers.17.self_attn.o_proj.q_weight", |
| "shape": [ |
| 896, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 401408, |
| "byteOffset": 25241856 |
| }, |
| { |
| "name": "model.layers.17.self_attn.o_proj.q_scale", |
| "shape": [ |
| 896, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 50176, |
| "byteOffset": 25643264 |
| }, |
| { |
| "name": "model.layers.18.input_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 25693440 |
| }, |
| { |
| "name": "model.layers.18.mlp.down_proj.q_weight", |
| "shape": [ |
| 896, |
| 608 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2179072, |
| "byteOffset": 25695232 |
| }, |
| { |
| "name": "model.layers.18.mlp.down_proj.q_scale", |
| "shape": [ |
| 896, |
| 152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 272384, |
| "byteOffset": 27874304 |
| }, |
| { |
| "name": "model.layers.18.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 9728, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4358144, |
| "byteOffset": 28146688 |
| }, |
| { |
| "name": "model.layers.18.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 9728, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 544768, |
| "byteOffset": 32504832 |
| }, |
| { |
| "name": "model.layers.18.post_attention_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 33049600 |
| }, |
| { |
| "name": "model.layers.18.self_attn.c_attn.bias", |
| "shape": [ |
| 1152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2304, |
| "byteOffset": 33051392 |
| } |
| ], |
| "md5sum": "f21a87aa7d6b0cd2aa64911d1644e7d4" |
| }, |
| { |
| "dataPath": "params_shard_4.bin", |
| "format": "raw-shard", |
| "nbytes": 33020928, |
| "records": [ |
| { |
| "name": "model.layers.18.self_attn.c_attn.q_weight", |
| "shape": [ |
| 1152, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 516096, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.18.self_attn.c_attn.q_scale", |
| "shape": [ |
| 1152, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 64512, |
| "byteOffset": 516096 |
| }, |
| { |
| "name": "model.layers.18.self_attn.o_proj.q_weight", |
| "shape": [ |
| 896, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 401408, |
| "byteOffset": 580608 |
| }, |
| { |
| "name": "model.layers.18.self_attn.o_proj.q_scale", |
| "shape": [ |
| 896, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 50176, |
| "byteOffset": 982016 |
| }, |
| { |
| "name": "model.layers.19.input_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 1032192 |
| }, |
| { |
| "name": "model.layers.19.mlp.down_proj.q_weight", |
| "shape": [ |
| 896, |
| 608 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2179072, |
| "byteOffset": 1033984 |
| }, |
| { |
| "name": "model.layers.19.mlp.down_proj.q_scale", |
| "shape": [ |
| 896, |
| 152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 272384, |
| "byteOffset": 3213056 |
| }, |
| { |
| "name": "model.layers.19.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 9728, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4358144, |
| "byteOffset": 3485440 |
| }, |
| { |
| "name": "model.layers.19.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 9728, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 544768, |
| "byteOffset": 7843584 |
| }, |
| { |
| "name": "model.layers.19.post_attention_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 8388352 |
| }, |
| { |
| "name": "model.layers.19.self_attn.c_attn.bias", |
| "shape": [ |
| 1152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2304, |
| "byteOffset": 8390144 |
| }, |
| { |
| "name": "model.layers.19.self_attn.c_attn.q_weight", |
| "shape": [ |
| 1152, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 516096, |
| "byteOffset": 8392448 |
| }, |
| { |
| "name": "model.layers.19.self_attn.c_attn.q_scale", |
| "shape": [ |
| 1152, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 64512, |
| "byteOffset": 8908544 |
| }, |
| { |
| "name": "model.layers.19.self_attn.o_proj.q_weight", |
| "shape": [ |
| 896, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 401408, |
| "byteOffset": 8973056 |
| }, |
| { |
| "name": "model.layers.19.self_attn.o_proj.q_scale", |
| "shape": [ |
| 896, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 50176, |
| "byteOffset": 9374464 |
| }, |
| { |
| "name": "model.layers.2.input_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 9424640 |
| }, |
| { |
| "name": "model.layers.2.mlp.down_proj.q_weight", |
| "shape": [ |
| 896, |
| 608 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2179072, |
| "byteOffset": 9426432 |
| }, |
| { |
| "name": "model.layers.2.mlp.down_proj.q_scale", |
| "shape": [ |
| 896, |
| 152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 272384, |
| "byteOffset": 11605504 |
| }, |
| { |
| "name": "model.layers.2.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 9728, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4358144, |
| "byteOffset": 11877888 |
| }, |
| { |
| "name": "model.layers.2.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 9728, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 544768, |
| "byteOffset": 16236032 |
| }, |
| { |
| "name": "model.layers.2.post_attention_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 16780800 |
| }, |
| { |
| "name": "model.layers.2.self_attn.c_attn.bias", |
| "shape": [ |
| 1152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2304, |
| "byteOffset": 16782592 |
| }, |
| { |
| "name": "model.layers.2.self_attn.c_attn.q_weight", |
| "shape": [ |
| 1152, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 516096, |
| "byteOffset": 16784896 |
| }, |
| { |
| "name": "model.layers.2.self_attn.c_attn.q_scale", |
| "shape": [ |
| 1152, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 64512, |
| "byteOffset": 17300992 |
| }, |
| { |
| "name": "model.layers.2.self_attn.o_proj.q_weight", |
| "shape": [ |
| 896, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 401408, |
| "byteOffset": 17365504 |
| }, |
| { |
| "name": "model.layers.2.self_attn.o_proj.q_scale", |
| "shape": [ |
| 896, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 50176, |
| "byteOffset": 17766912 |
| }, |
| { |
| "name": "model.layers.20.input_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 17817088 |
| }, |
| { |
| "name": "model.layers.20.mlp.down_proj.q_weight", |
| "shape": [ |
| 896, |
| 608 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2179072, |
| "byteOffset": 17818880 |
| }, |
| { |
| "name": "model.layers.20.mlp.down_proj.q_scale", |
| "shape": [ |
| 896, |
| 152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 272384, |
| "byteOffset": 19997952 |
| }, |
| { |
| "name": "model.layers.20.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 9728, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4358144, |
| "byteOffset": 20270336 |
| }, |
| { |
| "name": "model.layers.20.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 9728, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 544768, |
| "byteOffset": 24628480 |
| }, |
| { |
| "name": "model.layers.20.post_attention_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 25173248 |
| }, |
| { |
| "name": "model.layers.20.self_attn.c_attn.bias", |
| "shape": [ |
| 1152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2304, |
| "byteOffset": 25175040 |
| }, |
| { |
| "name": "model.layers.20.self_attn.c_attn.q_weight", |
| "shape": [ |
| 1152, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 516096, |
| "byteOffset": 25177344 |
| }, |
| { |
| "name": "model.layers.20.self_attn.c_attn.q_scale", |
| "shape": [ |
| 1152, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 64512, |
| "byteOffset": 25693440 |
| }, |
| { |
| "name": "model.layers.20.self_attn.o_proj.q_weight", |
| "shape": [ |
| 896, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 401408, |
| "byteOffset": 25757952 |
| }, |
| { |
| "name": "model.layers.20.self_attn.o_proj.q_scale", |
| "shape": [ |
| 896, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 50176, |
| "byteOffset": 26159360 |
| }, |
| { |
| "name": "model.layers.21.input_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 26209536 |
| }, |
| { |
| "name": "model.layers.21.mlp.down_proj.q_weight", |
| "shape": [ |
| 896, |
| 608 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2179072, |
| "byteOffset": 26211328 |
| }, |
| { |
| "name": "model.layers.21.mlp.down_proj.q_scale", |
| "shape": [ |
| 896, |
| 152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 272384, |
| "byteOffset": 28390400 |
| }, |
| { |
| "name": "model.layers.21.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 9728, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4358144, |
| "byteOffset": 28662784 |
| } |
| ], |
| "md5sum": "b3a65d1e318f59925f1cde8e67cf31cd" |
| }, |
| { |
| "dataPath": "params_shard_5.bin", |
| "format": "raw-shard", |
| "nbytes": 29211648, |
| "records": [ |
| { |
| "name": "model.layers.21.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 9728, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 544768, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.21.post_attention_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 544768 |
| }, |
| { |
| "name": "model.layers.21.self_attn.c_attn.bias", |
| "shape": [ |
| 1152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2304, |
| "byteOffset": 546560 |
| }, |
| { |
| "name": "model.layers.21.self_attn.c_attn.q_weight", |
| "shape": [ |
| 1152, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 516096, |
| "byteOffset": 548864 |
| }, |
| { |
| "name": "model.layers.21.self_attn.c_attn.q_scale", |
| "shape": [ |
| 1152, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 64512, |
| "byteOffset": 1064960 |
| }, |
| { |
| "name": "model.layers.21.self_attn.o_proj.q_weight", |
| "shape": [ |
| 896, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 401408, |
| "byteOffset": 1129472 |
| }, |
| { |
| "name": "model.layers.21.self_attn.o_proj.q_scale", |
| "shape": [ |
| 896, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 50176, |
| "byteOffset": 1530880 |
| }, |
| { |
| "name": "model.layers.22.input_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 1581056 |
| }, |
| { |
| "name": "model.layers.22.mlp.down_proj.q_weight", |
| "shape": [ |
| 896, |
| 608 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2179072, |
| "byteOffset": 1582848 |
| }, |
| { |
| "name": "model.layers.22.mlp.down_proj.q_scale", |
| "shape": [ |
| 896, |
| 152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 272384, |
| "byteOffset": 3761920 |
| }, |
| { |
| "name": "model.layers.22.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 9728, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4358144, |
| "byteOffset": 4034304 |
| }, |
| { |
| "name": "model.layers.22.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 9728, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 544768, |
| "byteOffset": 8392448 |
| }, |
| { |
| "name": "model.layers.22.post_attention_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 8937216 |
| }, |
| { |
| "name": "model.layers.22.self_attn.c_attn.bias", |
| "shape": [ |
| 1152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2304, |
| "byteOffset": 8939008 |
| }, |
| { |
| "name": "model.layers.22.self_attn.c_attn.q_weight", |
| "shape": [ |
| 1152, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 516096, |
| "byteOffset": 8941312 |
| }, |
| { |
| "name": "model.layers.22.self_attn.c_attn.q_scale", |
| "shape": [ |
| 1152, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 64512, |
| "byteOffset": 9457408 |
| }, |
| { |
| "name": "model.layers.22.self_attn.o_proj.q_weight", |
| "shape": [ |
| 896, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 401408, |
| "byteOffset": 9521920 |
| }, |
| { |
| "name": "model.layers.22.self_attn.o_proj.q_scale", |
| "shape": [ |
| 896, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 50176, |
| "byteOffset": 9923328 |
| }, |
| { |
| "name": "model.layers.23.input_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 9973504 |
| }, |
| { |
| "name": "model.layers.23.mlp.down_proj.q_weight", |
| "shape": [ |
| 896, |
| 608 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2179072, |
| "byteOffset": 9975296 |
| }, |
| { |
| "name": "model.layers.23.mlp.down_proj.q_scale", |
| "shape": [ |
| 896, |
| 152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 272384, |
| "byteOffset": 12154368 |
| }, |
| { |
| "name": "model.layers.23.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 9728, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4358144, |
| "byteOffset": 12426752 |
| }, |
| { |
| "name": "model.layers.23.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 9728, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 544768, |
| "byteOffset": 16784896 |
| }, |
| { |
| "name": "model.layers.23.post_attention_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 17329664 |
| }, |
| { |
| "name": "model.layers.23.self_attn.c_attn.bias", |
| "shape": [ |
| 1152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2304, |
| "byteOffset": 17331456 |
| }, |
| { |
| "name": "model.layers.23.self_attn.c_attn.q_weight", |
| "shape": [ |
| 1152, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 516096, |
| "byteOffset": 17333760 |
| }, |
| { |
| "name": "model.layers.23.self_attn.c_attn.q_scale", |
| "shape": [ |
| 1152, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 64512, |
| "byteOffset": 17849856 |
| }, |
| { |
| "name": "model.layers.23.self_attn.o_proj.q_weight", |
| "shape": [ |
| 896, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 401408, |
| "byteOffset": 17914368 |
| }, |
| { |
| "name": "model.layers.23.self_attn.o_proj.q_scale", |
| "shape": [ |
| 896, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 50176, |
| "byteOffset": 18315776 |
| }, |
| { |
| "name": "model.layers.3.input_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 18365952 |
| }, |
| { |
| "name": "model.layers.3.mlp.down_proj.q_weight", |
| "shape": [ |
| 896, |
| 608 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2179072, |
| "byteOffset": 18367744 |
| }, |
| { |
| "name": "model.layers.3.mlp.down_proj.q_scale", |
| "shape": [ |
| 896, |
| 152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 272384, |
| "byteOffset": 20546816 |
| }, |
| { |
| "name": "model.layers.3.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 9728, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4358144, |
| "byteOffset": 20819200 |
| }, |
| { |
| "name": "model.layers.3.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 9728, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 544768, |
| "byteOffset": 25177344 |
| }, |
| { |
| "name": "model.layers.3.post_attention_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 25722112 |
| }, |
| { |
| "name": "model.layers.3.self_attn.c_attn.bias", |
| "shape": [ |
| 1152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2304, |
| "byteOffset": 25723904 |
| }, |
| { |
| "name": "model.layers.3.self_attn.c_attn.q_weight", |
| "shape": [ |
| 1152, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 516096, |
| "byteOffset": 25726208 |
| }, |
| { |
| "name": "model.layers.3.self_attn.c_attn.q_scale", |
| "shape": [ |
| 1152, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 64512, |
| "byteOffset": 26242304 |
| }, |
| { |
| "name": "model.layers.3.self_attn.o_proj.q_weight", |
| "shape": [ |
| 896, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 401408, |
| "byteOffset": 26306816 |
| }, |
| { |
| "name": "model.layers.3.self_attn.o_proj.q_scale", |
| "shape": [ |
| 896, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 50176, |
| "byteOffset": 26708224 |
| }, |
| { |
| "name": "model.layers.4.input_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 26758400 |
| }, |
| { |
| "name": "model.layers.4.mlp.down_proj.q_weight", |
| "shape": [ |
| 896, |
| 608 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2179072, |
| "byteOffset": 26760192 |
| }, |
| { |
| "name": "model.layers.4.mlp.down_proj.q_scale", |
| "shape": [ |
| 896, |
| 152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 272384, |
| "byteOffset": 28939264 |
| } |
| ], |
| "md5sum": "080f5062f73941ab65703596de8e1124" |
| }, |
| { |
| "dataPath": "params_shard_6.bin", |
| "format": "raw-shard", |
| "nbytes": 33297408, |
| "records": [ |
| { |
| "name": "model.layers.4.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 9728, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4358144, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.4.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 9728, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 544768, |
| "byteOffset": 4358144 |
| }, |
| { |
| "name": "model.layers.4.post_attention_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 4902912 |
| }, |
| { |
| "name": "model.layers.4.self_attn.c_attn.bias", |
| "shape": [ |
| 1152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2304, |
| "byteOffset": 4904704 |
| }, |
| { |
| "name": "model.layers.4.self_attn.c_attn.q_weight", |
| "shape": [ |
| 1152, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 516096, |
| "byteOffset": 4907008 |
| }, |
| { |
| "name": "model.layers.4.self_attn.c_attn.q_scale", |
| "shape": [ |
| 1152, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 64512, |
| "byteOffset": 5423104 |
| }, |
| { |
| "name": "model.layers.4.self_attn.o_proj.q_weight", |
| "shape": [ |
| 896, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 401408, |
| "byteOffset": 5487616 |
| }, |
| { |
| "name": "model.layers.4.self_attn.o_proj.q_scale", |
| "shape": [ |
| 896, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 50176, |
| "byteOffset": 5889024 |
| }, |
| { |
| "name": "model.layers.5.input_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 5939200 |
| }, |
| { |
| "name": "model.layers.5.mlp.down_proj.q_weight", |
| "shape": [ |
| 896, |
| 608 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2179072, |
| "byteOffset": 5940992 |
| }, |
| { |
| "name": "model.layers.5.mlp.down_proj.q_scale", |
| "shape": [ |
| 896, |
| 152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 272384, |
| "byteOffset": 8120064 |
| }, |
| { |
| "name": "model.layers.5.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 9728, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4358144, |
| "byteOffset": 8392448 |
| }, |
| { |
| "name": "model.layers.5.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 9728, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 544768, |
| "byteOffset": 12750592 |
| }, |
| { |
| "name": "model.layers.5.post_attention_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 13295360 |
| }, |
| { |
| "name": "model.layers.5.self_attn.c_attn.bias", |
| "shape": [ |
| 1152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2304, |
| "byteOffset": 13297152 |
| }, |
| { |
| "name": "model.layers.5.self_attn.c_attn.q_weight", |
| "shape": [ |
| 1152, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 516096, |
| "byteOffset": 13299456 |
| }, |
| { |
| "name": "model.layers.5.self_attn.c_attn.q_scale", |
| "shape": [ |
| 1152, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 64512, |
| "byteOffset": 13815552 |
| }, |
| { |
| "name": "model.layers.5.self_attn.o_proj.q_weight", |
| "shape": [ |
| 896, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 401408, |
| "byteOffset": 13880064 |
| }, |
| { |
| "name": "model.layers.5.self_attn.o_proj.q_scale", |
| "shape": [ |
| 896, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 50176, |
| "byteOffset": 14281472 |
| }, |
| { |
| "name": "model.layers.6.input_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 14331648 |
| }, |
| { |
| "name": "model.layers.6.mlp.down_proj.q_weight", |
| "shape": [ |
| 896, |
| 608 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2179072, |
| "byteOffset": 14333440 |
| }, |
| { |
| "name": "model.layers.6.mlp.down_proj.q_scale", |
| "shape": [ |
| 896, |
| 152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 272384, |
| "byteOffset": 16512512 |
| }, |
| { |
| "name": "model.layers.6.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 9728, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4358144, |
| "byteOffset": 16784896 |
| }, |
| { |
| "name": "model.layers.6.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 9728, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 544768, |
| "byteOffset": 21143040 |
| }, |
| { |
| "name": "model.layers.6.post_attention_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 21687808 |
| }, |
| { |
| "name": "model.layers.6.self_attn.c_attn.bias", |
| "shape": [ |
| 1152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2304, |
| "byteOffset": 21689600 |
| }, |
| { |
| "name": "model.layers.6.self_attn.c_attn.q_weight", |
| "shape": [ |
| 1152, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 516096, |
| "byteOffset": 21691904 |
| }, |
| { |
| "name": "model.layers.6.self_attn.c_attn.q_scale", |
| "shape": [ |
| 1152, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 64512, |
| "byteOffset": 22208000 |
| }, |
| { |
| "name": "model.layers.6.self_attn.o_proj.q_weight", |
| "shape": [ |
| 896, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 401408, |
| "byteOffset": 22272512 |
| }, |
| { |
| "name": "model.layers.6.self_attn.o_proj.q_scale", |
| "shape": [ |
| 896, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 50176, |
| "byteOffset": 22673920 |
| }, |
| { |
| "name": "model.layers.7.input_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 22724096 |
| }, |
| { |
| "name": "model.layers.7.mlp.down_proj.q_weight", |
| "shape": [ |
| 896, |
| 608 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2179072, |
| "byteOffset": 22725888 |
| }, |
| { |
| "name": "model.layers.7.mlp.down_proj.q_scale", |
| "shape": [ |
| 896, |
| 152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 272384, |
| "byteOffset": 24904960 |
| }, |
| { |
| "name": "model.layers.7.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 9728, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4358144, |
| "byteOffset": 25177344 |
| }, |
| { |
| "name": "model.layers.7.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 9728, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 544768, |
| "byteOffset": 29535488 |
| }, |
| { |
| "name": "model.layers.7.post_attention_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 30080256 |
| }, |
| { |
| "name": "model.layers.7.self_attn.c_attn.bias", |
| "shape": [ |
| 1152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2304, |
| "byteOffset": 30082048 |
| }, |
| { |
| "name": "model.layers.7.self_attn.c_attn.q_weight", |
| "shape": [ |
| 1152, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 516096, |
| "byteOffset": 30084352 |
| }, |
| { |
| "name": "model.layers.7.self_attn.c_attn.q_scale", |
| "shape": [ |
| 1152, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 64512, |
| "byteOffset": 30600448 |
| }, |
| { |
| "name": "model.layers.7.self_attn.o_proj.q_weight", |
| "shape": [ |
| 896, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 401408, |
| "byteOffset": 30664960 |
| }, |
| { |
| "name": "model.layers.7.self_attn.o_proj.q_scale", |
| "shape": [ |
| 896, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 50176, |
| "byteOffset": 31066368 |
| }, |
| { |
| "name": "model.layers.8.input_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 31116544 |
| }, |
| { |
| "name": "model.layers.8.mlp.down_proj.q_weight", |
| "shape": [ |
| 896, |
| 608 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2179072, |
| "byteOffset": 31118336 |
| } |
| ], |
| "md5sum": "a0efb605f6b502a9e2429f1d5751769f" |
| }, |
| { |
| "dataPath": "params_shard_7.bin", |
| "format": "raw-shard", |
| "nbytes": 14605824, |
| "records": [ |
| { |
| "name": "model.layers.8.mlp.down_proj.q_scale", |
| "shape": [ |
| 896, |
| 152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 272384, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.8.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 9728, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4358144, |
| "byteOffset": 272384 |
| }, |
| { |
| "name": "model.layers.8.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 9728, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 544768, |
| "byteOffset": 4630528 |
| }, |
| { |
| "name": "model.layers.8.post_attention_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 5175296 |
| }, |
| { |
| "name": "model.layers.8.self_attn.c_attn.bias", |
| "shape": [ |
| 1152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2304, |
| "byteOffset": 5177088 |
| }, |
| { |
| "name": "model.layers.8.self_attn.c_attn.q_weight", |
| "shape": [ |
| 1152, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 516096, |
| "byteOffset": 5179392 |
| }, |
| { |
| "name": "model.layers.8.self_attn.c_attn.q_scale", |
| "shape": [ |
| 1152, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 64512, |
| "byteOffset": 5695488 |
| }, |
| { |
| "name": "model.layers.8.self_attn.o_proj.q_weight", |
| "shape": [ |
| 896, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 401408, |
| "byteOffset": 5760000 |
| }, |
| { |
| "name": "model.layers.8.self_attn.o_proj.q_scale", |
| "shape": [ |
| 896, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 50176, |
| "byteOffset": 6161408 |
| }, |
| { |
| "name": "model.layers.9.input_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 6211584 |
| }, |
| { |
| "name": "model.layers.9.mlp.down_proj.q_weight", |
| "shape": [ |
| 896, |
| 608 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2179072, |
| "byteOffset": 6213376 |
| }, |
| { |
| "name": "model.layers.9.mlp.down_proj.q_scale", |
| "shape": [ |
| 896, |
| 152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 272384, |
| "byteOffset": 8392448 |
| }, |
| { |
| "name": "model.layers.9.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 9728, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4358144, |
| "byteOffset": 8664832 |
| }, |
| { |
| "name": "model.layers.9.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 9728, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 544768, |
| "byteOffset": 13022976 |
| }, |
| { |
| "name": "model.layers.9.post_attention_layernorm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 13567744 |
| }, |
| { |
| "name": "model.layers.9.self_attn.c_attn.bias", |
| "shape": [ |
| 1152 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2304, |
| "byteOffset": 13569536 |
| }, |
| { |
| "name": "model.layers.9.self_attn.c_attn.q_weight", |
| "shape": [ |
| 1152, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 516096, |
| "byteOffset": 13571840 |
| }, |
| { |
| "name": "model.layers.9.self_attn.c_attn.q_scale", |
| "shape": [ |
| 1152, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 64512, |
| "byteOffset": 14087936 |
| }, |
| { |
| "name": "model.layers.9.self_attn.o_proj.q_weight", |
| "shape": [ |
| 896, |
| 112 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 401408, |
| "byteOffset": 14152448 |
| }, |
| { |
| "name": "model.layers.9.self_attn.o_proj.q_scale", |
| "shape": [ |
| 896, |
| 28 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 50176, |
| "byteOffset": 14553856 |
| }, |
| { |
| "name": "model.norm.weight", |
| "shape": [ |
| 896 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1792, |
| "byteOffset": 14604032 |
| } |
| ], |
| "md5sum": "2bd528c473876584a15d9cd587b5d6ae" |
| } |
| ] |
| } |