{ "metadata": { "ParamSize": 165, "ParamBytes": 288967680.0, "BitsPerParam": 4.50113497363691 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 24576000, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32000, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 24576000, "byteOffset": 0 } ], "md5sum": "633e77cd70c09a1157f8c90051b657a9" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 30723072, "records": [ { "name": "lm_head.q_weight", "shape": [ 32000, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 24576000, "byteOffset": 0 }, { "name": "lm_head.q_scale", "shape": [ 32000, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072000, "byteOffset": 24576000 }, { "name": "model.embed_tokens.q_scale", "shape": [ 32000, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072000, "byteOffset": 27648000 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 30720000 } ], "md5sum": "dca9dd04b8518413856ec25481de4e12" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 32747520, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 1536, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 1536, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 3145728 }, { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 8192, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 3538944 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 8192, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 9830400 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 10616832 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 3072, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 10619904 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 3072, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 12979200 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 1536, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 13274112 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 1536, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 14453760 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 14601216 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 1536, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14604288 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 1536, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 17750016 }, { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 8192, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18143232 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 8192, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 24434688 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 25221120 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 3072, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 25224192 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 3072, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 27583488 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 1536, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 27878400 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 1536, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 29058048 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 29205504 }, { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 1536, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29208576 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 1536, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 32354304 } ], "md5sum": "acde503c622856d6448b2219453a9b11" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 29208576, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 8192, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 8192, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 6291456 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 7077888 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 3072, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 7080960 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 3072, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 9440256 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 1536, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9735168 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 1536, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 10914816 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 11062272 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 1536, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11065344 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 1536, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 14211072 }, { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 8192, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 14604288 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 8192, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 20895744 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 21682176 }, { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 3072, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21685248 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 3072, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 24044544 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 1536, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 24339456 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 1536, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 25519104 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 25666560 }, { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 1536, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 25669632 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 1536, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 28815360 } ], "md5sum": "a9b819d6d1241df4d18fc34f3cd297ce" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 29208576, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 8192, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 8192, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 6291456 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 7077888 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 3072, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 7080960 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 3072, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 9440256 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 1536, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9735168 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 1536, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 10914816 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 11062272 }, { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 1536, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11065344 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 1536, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 14211072 }, { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 8192, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 14604288 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 8192, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 20895744 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 21682176 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 3072, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21685248 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 3072, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 24044544 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 1536, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 24339456 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 1536, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 25519104 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 25666560 }, { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 1536, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 25669632 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 1536, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 28815360 } ], "md5sum": "8f1c9f6da63d1f79b9405093c969ee85" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 29208576, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 8192, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 8192, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 6291456 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 7077888 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 3072, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 7080960 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 3072, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 9440256 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 1536, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9735168 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 1536, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 10914816 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 11062272 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 1536, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11065344 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 1536, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 14211072 }, { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 8192, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 14604288 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 8192, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 20895744 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 21682176 }, { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 3072, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21685248 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 3072, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 24044544 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 1536, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 24339456 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 1536, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 25519104 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 25666560 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 1536, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 25669632 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 1536, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 28815360 } ], "md5sum": "ac5aea0e00011b11cc74943af756baa2" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 29208576, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 8192, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 8192, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 6291456 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 7077888 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 3072, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 7080960 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 3072, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 9440256 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 1536, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9735168 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 1536, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 10914816 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 11062272 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 1536, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11065344 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 1536, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 14211072 }, { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 8192, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 14604288 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 8192, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 20895744 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 21682176 }, { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 3072, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21685248 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 3072, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 24044544 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 1536, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 24339456 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 1536, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 25519104 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 25666560 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 1536, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 25669632 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 1536, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 28815360 } ], "md5sum": "7a7cfa6e4a6a344163a815a0a3b85009" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 29208576, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 8192, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 8192, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 6291456 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 7077888 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 3072, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 7080960 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 3072, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 9440256 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 1536, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9735168 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 1536, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 10914816 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 11062272 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 1536, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11065344 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 1536, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 14211072 }, { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 8192, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 14604288 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 8192, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 20895744 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 21682176 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 3072, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21685248 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 3072, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 24044544 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 1536, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 24339456 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 1536, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 25519104 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 25666560 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 1536, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 25669632 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 1536, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 28815360 } ], "md5sum": "43b8567c231fcd6ceb9d289e2a5e74c1" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 29208576, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 8192, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 8192, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 6291456 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 7077888 }, { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 3072, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 7080960 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 3072, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 9440256 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 1536, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9735168 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 1536, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 10914816 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 11062272 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 1536, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11065344 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 1536, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 14211072 }, { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 8192, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 14604288 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 8192, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 20895744 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 21682176 }, { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 3072, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21685248 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 3072, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 24044544 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 1536, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 24339456 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 1536, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 25519104 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 25666560 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 1536, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 25669632 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 1536, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 28815360 } ], "md5sum": "42f4a095eccd005499430d3995f1dd3f" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 25669632, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 8192, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 8192, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 6291456 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 7077888 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 3072, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 7080960 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 3072, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 9440256 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 1536, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9735168 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 1536, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 10914816 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 11062272 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 1536, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11065344 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 1536, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 14211072 }, { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 8192, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 14604288 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 8192, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 20895744 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 21682176 }, { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 3072, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21685248 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 3072, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 24044544 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 1536, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 24339456 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 1536, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 25519104 }, { "name": "model.norm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 25666560 } ], "md5sum": "8d3a7724603c587b5c168bb4500903c0" } ] }