{ "metadata": { "ParamSize": 805, "ParamBytes": 36381212672.0, "BitsPerParam": 3.4891900844756822 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 525336576, "records": [ { "name": "lm_head.q_weight", "shape": [ 1024, 128256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 525336576, "byteOffset": 0 } ], "md5sum": "7a36865a0406b487cbe14127283ba449" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.78.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "69768fde9bae1af6b16e9944db9deb62" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "440fc3338b669be29ce35304f16d6719" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.79.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "005e19cc0e9651ebe4e0b40b9abb7fba" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d6813cb9add83fffdb2853f7f0cf246c" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 31145984, "records": [ { "name": "lm_head.q_scale", "shape": [ 64, 128256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16416768, "byteOffset": 0 }, { "name": "model.layers.78.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16416768 }, { "name": "model.layers.78.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 16433152 }, { "name": "model.layers.78.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 20103168 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27443200 }, { "name": "model.layers.79.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27459584 }, { "name": "model.layers.79.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27475968 } ], "md5sum": "ab25bcca41e97928efff2303e255d720" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.79.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4dde3e68bafdce920fea003dc7e71070" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1135a55917eb657fdd208db632a29aa7" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 525336576, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 128256, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 525336576, "byteOffset": 0 } ], "md5sum": "5f15711fe19c866d11c78777cc403078" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ff9fc07118a42ce0351be1674057b92e" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "db0743c276be8775d4b68d97ff22c0d2" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 29835264, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.79.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.79.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.embed_tokens.q_scale", "shape": [ 128256, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16416768, "byteOffset": 9732096 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26148864 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 26165248 } ], "md5sum": "15a66df20994bfe776624b086e0c7d9c" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0bf003e228d272dfe71a5e4400fd2499" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1154e4531895f55f672bfa7bb74e59d7" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b530fdfc24290cd612a471f685098c24" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "082c5994568df37eeb8ffd9a4e93ceac" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "43a53345787ca2cb95a6871c1d726e27" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "4b66d799e7e3c1c1dd05e1cb3fbcb117" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "981072564b5974a52705133fc3e85900" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "42bd785c49347cbeecaa5bf8c744b6c6" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 9715712 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 17055744 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 18366464 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19415040 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 19431424 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "cd330448d9ab6b0eb3c6c3122e59241b" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e0e1db8da2b6f30ca458ea7c4f340c4a" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9675b3440ea96469f11f505edfa926cd" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7291ef1e639f0dd4dd47dbb50cb05bf3" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a12f04d36fee18919a1dda0525ad3206" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "21ff45f9239d70a5e07778ff938efdd8" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2e390f73a834ace9d9b24d9276b8024b" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "17caca97b34e403b8c234d2b8156592d" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f1b0644f11e31d223c60e3d71964de66" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "151d8ac3346d64771efe5d05774fb3c6" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8cb7b0d256311b03767e91399c338018" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 29163520, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 24428544 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 25493504 } ], "md5sum": "9aad474b4c0941698e9e2f1cf9a4fef3" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ca1cd6ec5932c3e4528fcd8cf65c694e" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "454161b05665665f0a94fbaf61471b3f" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9e74d7c91151732febd71fc4a8761322" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "487233f46e46bd1d53a8d016d7f7a805" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "983a46b09a806cbee11ec49c9e045420" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4ca3c129b8c909e915404830424eb945" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "aa56eeb0a04d97342a210d627858efed" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "24442681548f79981849cd7a43d9a867" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "85258eea206911263979dcbc69a8da0e" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d3e722cfb3528a6c6837c33896719ba8" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 32833536, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 23117824 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 30457856 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31768576 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32817152 } ], "md5sum": "087307ed5c0873aba38815150ec709dc" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "50c09371ca43bb4bc8764ddca0deb48f" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ac5149d27fa0f1535c465f04da846543" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "20971b9de9f7caf1d6e9d031834b8c78" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "369c7a8652b52964ba94d8334eef0168" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8aed0f5e46347224cce693b12285a2be" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a25208fad71bfd8f6eba2d1c6216bb37" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "112913eb60088a598c72549d53bdd358" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 28147712, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7356416 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 7372800 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11042816 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11059200 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11075584 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14745600 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22085632 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 22102016 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 23412736 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24461312 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 24477696 } ], "md5sum": "991569406526f33c12df8039928ac32a" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "25824731573fe6f6b5946790e880f6f4" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4615841a5b119d5460fd401bdc3c82d4" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "835af69cfaa464a427a01086f899c92d" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "13df5142eabce429e8b6ee53652fcfcd" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c1c4f633c1b23effcc02e532824238d0" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "55f068418f7b255df66286dc4150c241" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6c20ccf9656ee3141c501068f969f868" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f9029e0017f84d18005fa5d70b29a3ad" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 9715712 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11026432 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12075008 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 12091392 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 15761408 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "9d55f7a6e62e887cc4b705c50a722078" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b4038b0b085ad4df949b177b21c82521" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ce52c0aabada9635d66882f32b297553" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "88a4af0a457289f581c51edbdd98f51d" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "019de97981d85b0d7e11c539797a7d9b" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "516f66195044cd7b07384c6916b46b72" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "bf79ad1dada03c028dd658e5b7534e0a" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e9fa448d5ed223defa0e72946980e421" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e7b574be15bdef1fa302ebf165a8217c" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3011224569c6cd4b0091a3b6868ec373" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 28114944, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24428544 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 24444928 } ], "md5sum": "5b583ab28bede41aa5e4f7bea123af41" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1574a857cc7defd62c81d63ffad1b786" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "83c3f8f785240846d9b737df235c85ac" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d735d64fe6f0f92f4aaa0a8f1d6edc23" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0f6d5b3dba6bcbb36f5ff297c0ce2dd5" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "86924490ac7b847a09e6cc583681fe7e" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e43cb5b1442556f07f191640fd15bc6f" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "fb996b1d3d7fe32a6f616c50b407d444" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e8d216dc2730bcdbbec7070561db5d6e" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 32817152, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 7356416 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 8404992 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 8421376 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 12091392 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19431424 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 19447808 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 20758528 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 21807104 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 25477120 } ], "md5sum": "64011bd956b1e77627daf22ea3b78359" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ad400049548ae7c7d915b428a51259b6" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ec28fef67a460b16b524824fc1a45e81" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "61d5c254d9d9bb143b1d601042550df5" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e892b3de1e983423ff55ed39654f5849" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9b7a289224a73d49fc8830d1fbd195db" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "51b7bbb01e14abd4b2d670ce0179d1c5" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d4e6382948d35949f4a3b413968e67ec" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7d9b11b0214d82c5bf5d4c6fb07c17e6" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "eb2944d557a3d19a0533fc8170eab82d" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ad45abc512b2aa23ca63750a3f58610d" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 29196288, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 1310720 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2375680 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2392064 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2408448 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6078464 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13418496 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 13434880 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 14745600 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15794176 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 15810560 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 19480576 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26820608 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 26836992 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 28147712 } ], "md5sum": "f23ab1bd8f46db38dc5fe055fac03c62" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3f2cfc09d805a52c5c6cac745ff34b2d" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fc893aef3c2db0007bba7eb59e6e5b7a" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6ea667da787adfc100a138f5d8df75e5" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e54e42196f5b3856da271b5dffff496c" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "022526442dab432212d87373224b5ffd" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6bea2ba246913c2feb1d08ad9586272c" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a14c42a5cef66bc73b7ec396d99d10f1" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0743298340a4d3de482f686444020ff4" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "17cf8fa3b590eff33fcf087c5a432661" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 30490624, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9715712 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13402112 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 13418496 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 17088512 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24428544 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 24444928 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 25755648 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26804224 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 26820608 } ], "md5sum": "95f25c23628a224f1a15870770d254c0" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "86ee2a082dc1cfe67e905ea014ddf53d" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c323547aea86a3ce071f48e36e03c313" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2c5da94a830eab849b225faaf3af3f64" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "79ce7b4872653ee91f121f2673756207" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b2875bcfc99333213d7eb4f5ffbfc68b" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "514ba5b62f0c84df02628da0a4be0055" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "29e26a6183d25ea541f85115ba7521bf" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b433f827ecd9ec8b03b7b04c3ae60187" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 9715712 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11026432 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12075008 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 12091392 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 15761408 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "8267763ac60e5d8abbac6eee5d2b0c7a" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "af696bc6a943489c2035beab4d5a515c" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c4376bcca74f48190ee9493bf8533f1c" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9b2b56fb2e7eec8edcbc362fdbd9eb8f" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "689fcabdc7fa7a6fb184e87d841aa7b3" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "95680c38d5ca21a48a6a13b38922e8c9" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "da20995424d70933e4be1fe098d42596" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ed92d0223b080b6a57f4b5c87e60cc09" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "955454ab345e3573829612d33f7e525f" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "cd592b500e2c7eaf2cad41466f0f619d" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 28114944, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24428544 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 24444928 } ], "md5sum": "1a0a3db508161aaf7bbafbef0d628a2f" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "33ea76b621904ebc2c99cce02c8ab87f" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5d23e25c2b37aed2d40034e52d940b17" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b730ce8123f956c9aabe489fb53f9954" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1f974f22d6c962fddcd3cdb5c3b39f83" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7775d1372e59f4c57db6ee2a8ec2253b" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "fea2550d808a7507447239af0b8bd213" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "33687b62402dc3642d8e0759f7d59b57" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "99ff8ba56917f9c9152c4352281c9ccb" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 32817152, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 7356416 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 8404992 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 8421376 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 12091392 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19431424 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 19447808 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 20758528 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 21807104 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 25477120 } ], "md5sum": "a8f9cef1172b02dada45bd280a16deae" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "dca4187a0d714fcacf5c133b8246993e" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "684802a2af97efd3380a09ac6b376d34" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "7d1ec5a8a99bbe61f5626263839c00cb" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e9303c62383db52749ac07a3de7f3475" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fc289618141aa9f88a9bff045eae7177" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ad5a35d171123f41087a232b58923417" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "20bc0e71bb5b5055f84743f17121e62e" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "96570acb3d930e80d2f40b3563b11882" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "13ffac354cfddab5dd48d539e3fbba42" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "37349846fd3f85bbf746543ef020b10a" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 29196288, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 1310720 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2375680 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2392064 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2408448 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6078464 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13418496 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 13434880 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 14745600 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15794176 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 15810560 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 19480576 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26820608 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 26836992 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 28147712 } ], "md5sum": "97e55586bb9d5e0d6843b97ef1cfb441" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2f8a3abd96472d0f188c5d9fa7b1a6b4" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2318b2838053e47134ddebe476debbba" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "726f4e6f45f7befca620d7fe1508a6fd" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6eb6bf5f1865af3e1f5f02532583d968" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8f70864231bdbda259e3f9b330023b0a" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1d6f5ac71361eb8e63219eb97af0e6fa" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9d8eddd8b787cf08487e8c5c7c2667e2" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "eea9f53c460d5b07b51807acfa59bfcf" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6d6b81806f28eb920d838e3d612501f5" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 30490624, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9715712 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13402112 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 13418496 }, { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 17088512 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24428544 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 24444928 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 25755648 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26804224 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 26820608 } ], "md5sum": "a2aa29e297ae879f49cb0eb6f741bce1" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "27c974b02cd027aae3423e0f841daa9f" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "387130f0cb0f19970782c3109e7085f9" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2b2f7694e2b9d90beda8d97b05850535" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b4de20f5ed18f2080035e0cba4439442" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "509849d93270b662bdec1d40425cdc71" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "daa63a7ad1b1d6a7bbb9d311ae352150" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d484073826c311688ff18be65c20ac16" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2026025c9a602c7244a41e76e6fb0d98" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 9715712 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11026432 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12075008 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 12091392 }, { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 15761408 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "a4d621bca949f13ae71af578d148019f" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8706dc01b53bb1fcfcae6b2dd454f4c7" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "34ddb169cf6cc87e809012545447d4f9" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7ea8cd74e7a91e078b82388e47011dd7" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "cd832853feaeb1a8a863260d80bb5c18" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a9a13f3877fa7ac3cd3f2c136c77b0ed" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "70019a2619f5cba878e531c634919534" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f5aa6b48664d5ec4b34e970d994376e8" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f9473d19158bf6e6fca0370f10e94045" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d2c98e3c1acda19196757d2f5e1e156f" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 28114944, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24428544 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 24444928 } ], "md5sum": "1b626f4749185df5be683f0297a75b04" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e6b9336e2388f161272f86eee1d18d46" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "7462d4098e55896985ff3d40c87675c3" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "bbce13e2c4fc0e7edafb6132700d477c" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "210ae9095390f410876ab21170829fcd" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "dc8f3df0fe8e9bd486d27e43af6c9132" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f13a178ddcd135355e324a3fae7bc02e" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "40d8c1df44c1310f1cba3098d416fbf3" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f18a5099ce1aa6b28f8ad41a70152624" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 33112064, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7356416 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 7372800 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 11042816 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18382848 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18399232 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19709952 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 20774912 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 24444928 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31784960 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 31801344 } ], "md5sum": "e7728a3e2b64dcb9b2fdff111f42861f" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f75ad5adcca056c36dba609613ddc646" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e4847ed2752a0d88bfe00c01d31b41c7" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8eae2c7a9b0e3f21a99f11ffb3f3a0b9" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "25362571076cfd89f8eb9e1e2ee8e0f5" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "fee1d9b6855d7f75f5af6296a5d10fa5" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a7246475176b7bfd8c2e0c0446389197" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c5ccef92fe25482b423bc24581253ce0" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e8a24e8645d934e2b241ae493b0c17d6" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "70452df3ac3b1d185699072dcf6186b8" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "25f8d503f3ddbe28bff8d6ef2bd0e7ad" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 31522816, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 1048576 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2375680 }, { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6045696 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 13402112 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14450688 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 14467072 }, { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 18137088 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25493504 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26804224 }, { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27852800 } ], "md5sum": "43095f273796ee9b85e2b179aa8aede2" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a7e7d1370b0f5e2be6b3a58b33302513" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f7da8800bdc95885f6f3e9ba9259bebb" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a034ede99248a84ce49e5946e0a8cfd3" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2dad36baf2b0b295123490dd2727a954" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a5ba2860084e333c16f0066d671e32b0" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5591ef0bdcead20e40c6467b70391a0e" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d1ae7daf381206d1e7d8345ad0ae270b" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "eade8f1dfc47f6e1935837ddc279c4c1" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 26820608, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.42.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9732096 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9748480 }, { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13418496 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.43.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20774912 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22085632 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23134208 }, { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23150592 } ], "md5sum": "52c7ce2c4d9898ada800082c789378dd" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ca13fd15d4b79a5a0f2d6fefeb289663" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f405682681fc944fdd1b8163f2f1558a" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ee1e1552f087f3d530d5785ed65edf1e" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d4330f2dcedc6155725986246db1751e" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f27f28e913a2b1d41b51bdeadfcd8612" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "819c83339b91860609511447bf48904b" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "927f1bae9b4f347f06b3533ff0fce576" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "431ec0720977638f401f947c25cb506e" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.44.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 9715712 }, { "name": "model.layers.45.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 17055744 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 18366464 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19415040 }, { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 19431424 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "b41a00381cc945367d02b5fe4aa6b430" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9094ae88de8871a98f0e749af9e30e53" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "224658ffb866669d5a52b72d861b8c3c" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "eb83f90f6c83009e9585aa8c5bdd8fa4" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a61dea092a524b7c16aae8f49ae66ee3" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.47.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "18f8322b0a72b691630043a537f2f45b" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2c237b465129e2a5bea2a16717a1491e" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.48.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "12f02e07158e1a351fae55352df8746c" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "808fcfd8c57f0cac8e148580fa3f3de5" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "207e00c3e51d696eb8b3b690eb812d5d" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f924b8f711dda546236c9de00638338a" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 29163520, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.46.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.47.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.48.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 24428544 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 25493504 } ], "md5sum": "dda73ab58724536a0b62fc22b7fae159" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b7a74f7f2d4d0eed4e1fe75ec243f886" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "332edc9cb81aaccd5a6157b8f8c06729" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.49.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4dc9ea59a0beba4f227acc228f389d34" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c5572b6617aa26dde93e54e853bf6a90" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a9deab60ae49f568025b1e496b0bc09f" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "399758a7b9c1e88096565d83aead9feb" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.50.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6c87787bfc1988b5b0a59c7b89bd84f7" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e09df942e93b0909c7ad1fbce3c8bbb6" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 33112064, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7356416 }, { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 7372800 }, { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 11042816 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18382848 }, { "name": "model.layers.49.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18399232 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19709952 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 20774912 }, { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 24444928 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31784960 }, { "name": "model.layers.50.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 31801344 } ], "md5sum": "7bbf92a92ba2022e72c012cc51b17ad9" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.51.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "98e8471d0b676b1f2d11dc8746441d80" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "3a8c176dc9cfb764ecbc2453fd56f0b5" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a215952c16f1dd95f92c6d79ed074812" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d08179d0aa4a0833f875c35ebf4426e1" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5143ed308a15da90e64b09a818acaa0b" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "46965b9d9e0a7fdfdc9c6c62187fef67" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.52.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a571ef0f26bf2932080410d6ec6571f4" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "58540fd036bc77f072bba66f159d0be6" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "86794bdf7d81278b77b48b26c88c0e37" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9a89bc71aa8bcf34c99c62bd054ddc6b" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 31522816, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.51.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 1048576 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2375680 }, { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6045696 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 13402112 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14450688 }, { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 14467072 }, { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 18137088 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.52.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25493504 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26804224 }, { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27852800 } ], "md5sum": "074bf4fe7a535249d391d94b12ae1c48" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.53.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "145919ed13f3442ef176ed740c1e0b8e" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e0482a67ac25ef43a7432bf1c7dcac03" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "aa8f9ef1dc232564ba9855d2a8af4ce5" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "fa817777ec16553cc74cd9df6c0cd488" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.54.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "68b92ceb4eade8d778e9019bffd26f40" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c73cc2f4fa99410cb095e2b01bb59c51" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "68a821906a394d7d9308df13cc3eb0af" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2422cf808c3a4d8327213608ffb6e996" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 26820608, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.53.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9732096 }, { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9748480 }, { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13418496 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.54.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20774912 }, { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22085632 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23134208 }, { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23150592 } ], "md5sum": "9a43b736b64f03754cf4eefa12f81df0" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.55.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "77acc121d985e543d01d13ff4c924eef" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "51794dfd107e49395ee2b7dc9e1dea47" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "22902b7341ad39cd45b310d01e4bfb56" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.56.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7eb8cc09eb1260483ce750f60322a694" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6b28da5ce6a973b031e8106824d564ce" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "663f807210f2bb40e4787496d000856c" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "460d8c7ea0d915b362ac936b8550d811" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "308df168ee5ff9680c21c78ae61c90dd" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.55.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 9715712 }, { "name": "model.layers.56.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 17055744 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 18366464 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19415040 }, { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 19431424 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "41b8edad40fb93ee273831e6899c6dd6" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.57.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "cd45258a7051c00244be4266d3b63000" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "aa411e9c5c61fc379b40443401ed1e38" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "db6f3817dd7c3676422e09dfae720445" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "68648cb8af11c9c285690784b641224f" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.58.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "25ef063d725c1855168cd1e9eb8a312c" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6e1b7dc08918281851e11595e2abce5d" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.59.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "15e2ab7935d4aad57fa076916876d2a9" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "af706f028705de3cfcefa6c0f8d72826" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a965bf59ebbdd57e3c8e8a2f7664d1b4" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3f8b19b8e547b89466aae4d48eab424d" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 29163520, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.57.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.58.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.59.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 24428544 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 25493504 } ], "md5sum": "409c26c085e3368da34e5d47580108c3" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "05c0000c41734d7eb02ebf28082b825a" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3ea557f51ce35aeef1b6db7bd4586594" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.60.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "54b93cd21d29aecb9cea2df9eb156e5b" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "60e13f805450a3978096a03750ad6b7d" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "423a0b8c3841400629b9014676e11067" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9515e05a0629650f7ff400114c8ee60b" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.61.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "83fe86e4012f5e845e0ddfddc467635f" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4f135cdbaa8347156373fa1f67cd247a" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 33112064, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7356416 }, { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 7372800 }, { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 11042816 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18382848 }, { "name": "model.layers.60.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18399232 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19709952 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 20774912 }, { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 24444928 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31784960 }, { "name": "model.layers.61.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 31801344 } ], "md5sum": "b700c77071961428863c1aab56ace16d" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.62.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6862f33952b6aa162f11b37f502ed7dd" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ac88b9ce8878219dbfbbacd95fc2856c" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "682400599ada66824c6f551ed5a07461" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2d5ca7a16f5061c967294e6e8eab5d52" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9f857a01b2c3af60511d76a74ac7d1a8" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1669c065814af110b6dead536ad4311c" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.63.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c4363a7c3b9e837d4a73b16184da232b" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d6ac66320aecb5164d7d118132d3cadd" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.64.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "70db353938537ae5215b007715a07cb7" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "50d1f7185b71b1a8f3c395c1bffce030" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 31522816, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.62.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 1048576 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2375680 }, { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6045696 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 13402112 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14450688 }, { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 14467072 }, { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 18137088 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.63.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25493504 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26804224 }, { "name": "model.layers.64.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27852800 } ], "md5sum": "3677ee5f0db0bc5e88232b7de21fc208" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.64.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6ccbaa832248514dd89b0541326b3a85" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d7da20b487d35b7adfc29351a109afdd" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.65.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f9162ddb21fd0002b1db6f8801fc902d" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "30afc00d5031eb55dd0b820bc42f4d89" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.65.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6b09af1bc5bc7cee67c8d0e16b1f422a" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.65.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d3c9ced76b7f2c95ac9aa15ce561b624" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.66.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6ad5f518925fc24e30e4162112a531e4" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0c91a9d3ab9d0a5f488c35a6caa238b6" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 26820608, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.64.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.64.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9732096 }, { "name": "model.layers.65.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9748480 }, { "name": "model.layers.65.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13418496 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.65.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20774912 }, { "name": "model.layers.65.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22085632 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23134208 }, { "name": "model.layers.66.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23150592 } ], "md5sum": "55733ef96c7339b886d13b379ef646b0" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.66.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "427eeef1e4a46851e87d0d694d290cb0" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.66.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f3cbf9ac5a4abaac6a0a7ec132db069a" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f63b9b43fd3c023808930949ccaaeffa" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.67.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "45ea5db9c54ce6b4643cc1611ab40f6e" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.67.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "47d4085cd363081342bc85f46645e580" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.67.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "23337556db8895775de0f738e0d78d9d" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.68.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "df2b06468084984e6f1d6eb0fecbb454" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "bd622eed25c4543ba4dbd5c7aa31c476" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.66.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.66.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.67.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 9715712 }, { "name": "model.layers.67.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 17055744 }, { "name": "model.layers.67.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 18366464 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19415040 }, { "name": "model.layers.67.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 19431424 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.68.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "7c4db2942b26fa0c1afd29e1700181f3" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.68.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a95ac717140f44ffb7910601a17222ac" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "47bc51935d95277e7803763cc6b717b0" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.69.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "954e44de1abd7bf323573898250b45c9" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0b600e7b6a3ffab064ab19d672dbef41" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.69.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8af4257f9d00b0f1175def70ef52107f" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "121b3fa93845e895d312ba148c3a6489" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.70.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "edf02da016e507a5b11e83804e1039e5" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b2eb20f375f57a7f0ef8e2ddd4c654ce" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "dc92e27cf86ed63aa1af1899780c461e" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f3b8427f79cada8b720149d09b4a4b6c" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 29163520, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.68.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.68.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.69.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.69.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.69.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.69.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.70.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.70.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 24428544 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 25493504 } ], "md5sum": "4945eaf0731c6f309a2467cccd16ab04" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3885cffbbf5115d63c02357cee2142c5" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "3c4957f55568005044451d6757bad228" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1ee07aea0e1ed202bdfb0b2fca9ef08f" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b88108ff0e190f2c4f4425e5ce1e17ac" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b43a962edaca17e135e49f24fc972491" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "09bb0da60ceaac0028cf3bad33ec790f" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "41ec8a6bdab014c210cdf07a3dc555d4" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f3c975bdbf35a7f7e28a64b8076d3aca" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.70.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "cc41765e045ec63c0c24ad074dce8359" }, { "dataPath": "params_shard_324.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8b9bc4c562e8c722c35266a1a8f524db" }, { "dataPath": "params_shard_325.bin", "format": "raw-shard", "nbytes": 31522816, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 7356416 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 8404992 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 8421376 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 12091392 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19431424 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 19447808 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 20758528 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 21807104 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25477120 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26787840 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27836416 }, { "name": "model.layers.70.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27852800 } ], "md5sum": "2c380c5bb45e51df1a64a3d39b77eed9" }, { "dataPath": "params_shard_326.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.71.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "71e5ee2d3edeb313ca97a39360795791" }, { "dataPath": "params_shard_327.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "37c6d580d5e5d805f94bbbe49dc83b15" }, { "dataPath": "params_shard_328.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.71.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e0c2881ffe4cc0e071ba68b2ce8ab911" }, { "dataPath": "params_shard_329.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.71.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6ab738baada234c1f2451ba89e1da206" }, { "dataPath": "params_shard_330.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.72.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e1e4e7e8643d6cf96d120da75b560c35" }, { "dataPath": "params_shard_331.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f03e89098eebc9687821c3ef7b542b0c" }, { "dataPath": "params_shard_332.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.72.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2f2e0a0840eee0510ee05e7894230555" }, { "dataPath": "params_shard_333.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c94dbf3422dbc9f80acc89d522627197" }, { "dataPath": "params_shard_334.bin", "format": "raw-shard", "nbytes": 33112064, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7356416 }, { "name": "model.layers.71.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 7372800 }, { "name": "model.layers.71.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 11042816 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18382848 }, { "name": "model.layers.71.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18399232 }, { "name": "model.layers.71.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19709952 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.72.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 20774912 }, { "name": "model.layers.72.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 24444928 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31784960 }, { "name": "model.layers.72.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 31801344 } ], "md5sum": "c3646cb244378b0e2689f2b5e4442a8a" }, { "dataPath": "params_shard_335.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.73.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "49d89cb6d07b702c9586557b25206be2" }, { "dataPath": "params_shard_336.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.73.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "98a4aa3e298f86be7115a0731c114c24" }, { "dataPath": "params_shard_337.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0e10f3a1254d4dc714349cee4c98d25f" }, { "dataPath": "params_shard_338.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.73.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a73805b352dbd35446dcc8f581fe6af0" }, { "dataPath": "params_shard_339.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.74.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a22c02e9d0815f94f85b8fa503fc259e" }, { "dataPath": "params_shard_340.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "be68630d018b6a01e642c9295ed2aecb" }, { "dataPath": "params_shard_341.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.74.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3e72c7fefd45561e64e36449e6dbb739" }, { "dataPath": "params_shard_342.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.74.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "203f3f9c6738ee432159644d28fd3c4d" }, { "dataPath": "params_shard_343.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.75.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ff5567fd97422405abd337b9d9fc96b5" }, { "dataPath": "params_shard_344.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f4f3f63102ca8e6412b95cbe8e8a1b8d" }, { "dataPath": "params_shard_345.bin", "format": "raw-shard", "nbytes": 31522816, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.73.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 1048576 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.73.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2375680 }, { "name": "model.layers.73.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6045696 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.73.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 13402112 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14450688 }, { "name": "model.layers.74.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 14467072 }, { "name": "model.layers.74.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 18137088 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.74.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25493504 }, { "name": "model.layers.74.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26804224 }, { "name": "model.layers.75.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27852800 } ], "md5sum": "401891e5c86c2c6ff4f13308ca3bec73" }, { "dataPath": "params_shard_346.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.75.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "dcdbbcf2a527c14024027d574ea7ae74" }, { "dataPath": "params_shard_347.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7fc50356bf084882b82402367b3a99e7" }, { "dataPath": "params_shard_348.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.76.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f26bfb96b7596c5e6f5ffcf07f585264" }, { "dataPath": "params_shard_349.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "eb227316537604acd0cf843b025b5d8b" }, { "dataPath": "params_shard_350.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.76.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a9720f13d2d59edb5dc3da847048a5b7" }, { "dataPath": "params_shard_351.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.76.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "468f8d011387b8ca944c61999074cef8" }, { "dataPath": "params_shard_352.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.77.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e63151bcde52a771d125ef8cc7c6a265" }, { "dataPath": "params_shard_353.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5205481aa0ebf606d0424dde07598118" }, { "dataPath": "params_shard_354.bin", "format": "raw-shard", "nbytes": 26820608, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.75.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.75.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9732096 }, { "name": "model.layers.76.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9748480 }, { "name": "model.layers.76.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13418496 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.76.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20774912 }, { "name": "model.layers.76.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22085632 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23134208 }, { "name": "model.layers.77.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23150592 } ], "md5sum": "19e9e0c7067c3e15fada0e1be6f53cb8" }, { "dataPath": "params_shard_355.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.77.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9004675ec587bdaa2e7eeee3f01389f8" }, { "dataPath": "params_shard_356.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.77.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "10c504bed73c63cf79639cbc25e38c1e" }, { "dataPath": "params_shard_357.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.78.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "afb50fd18fe8d35994e9c5011e304004" }, { "dataPath": "params_shard_358.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2876a1d262ada66bfd701afd54afa068" }, { "dataPath": "params_shard_359.bin", "format": "raw-shard", "nbytes": 12075008, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.77.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.77.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.78.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 9715712 }, { "name": "model.layers.78.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11026432 } ], "md5sum": "ad885f38ffe9818e1742dea42bba9698" } ] }