diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,12343 @@ +{ + "metadata": { + "ParamSize": 805, + "ParamBytes": 31776318464.0, + "BitsPerParam": 3.04023285660184 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 420679680, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 128256, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 420679680, + "byteOffset": 0 + } + ], + "md5sum": "2dd985bf1f712f136f39c630cedc734d" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 52584960, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 128256, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52584960, + "byteOffset": 0 + } + ], + "md5sum": "e9321f4b3a03806c179878cf4fbe4670" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.78.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "81aff582211335e6eb7325f43466f048" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.79.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "c3a9257820a5c81a4634d52e56239a9b" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 23560192, + "records": [ + { + "name": "model.layers.78.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 0 + }, + { + "name": "model.layers.78.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 16384 + }, + { + "name": "model.layers.78.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.79.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23543808 + } + ], + "md5sum": "ecd1254ef70dc32644a94f4f328fb54f" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.79.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "3eef56cdeb2b88bb304c9dbfc9b2df90" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.79.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "b91b82223c6ffa815a20cc99e412fe06" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.79.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "22bda3ffb0d765c538091637ee8375ff" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.79.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "d8ec0d2cf25af6dd72dd8042e7861506" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 420679680, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 128256, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 420679680, + "byteOffset": 0 + } + ], + "md5sum": "b59ba6cf25cb8ddeb61ecd7282e3ada1" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 52584960, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 128256, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52584960, + "byteOffset": 0 + } + ], + "md5sum": "b5f743dc05d728ff752d216de7147ee9" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "e09d3105aa3ce538e44bbd2dde86582a" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "6c0737273482d6bdcf9549f8ef8b1187" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "64786d424b4fd771cff19ce962f947bd" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "76abb8165cd6abacca17263d08c0909c" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.79.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.79.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.79.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.79.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.norm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19320832 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19337216 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 19353600 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31100928 + } + ], + "md5sum": "9d810d0392c70d6250623855757b6eac" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 4198400 + } + ], + "md5sum": "6f93efa33de64fbe5abef5e5de387a4b" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "a4614a02da599d3bca0f69ee9225becb" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "e05dd614151182b8e853e789af7159e2" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "c3fc59065cfbb32d8ea727d789a3a325" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 0 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 3358720 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 26869760 + } + ], + "md5sum": "1ea11773b46a5d7cba6a7f329113a828" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "c2615a072a3680593d4e7781d851b44b" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "489d7b6cf340a81a2f4bce2983f38854" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "d4f21169dc97e5a1d8c25081400e04a2" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "5d72dc17de0add1261c80c80f0fc1ed6" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "9638b13815d328d4212a0ae2af9d34be" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "7723734e112600b6149a68563d89943d" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 0 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3358720 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 3375104 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15122432 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 15155200 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 26902528 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 26918912 + } + ], + "md5sum": "30335bc2355d8c76927d6f74401c7209" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "55989eb8ec09b44769d3fb14003570e6" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "1e53d99ccf85c1161fae22d0c92e9722" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "1a9c96e12c4e67c55b95edc51c94e5a6" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "3bdf0111450615f75de3b6beea27e9f5" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "cc4fd28d4c609ba7fed8104bd4d903ea" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "2058e6d4915e47c62f84984abbdc9d2c" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "ae3ed7e10aa9dafcc2f4b801345fcdbd" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "8eed8a811d92430a116aa83bab85c35d" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 30269440, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 0 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3358720 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 3375104 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15122432 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 15138816 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 19337216 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 22695936 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 26894336 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 30253056 + } + ], + "md5sum": "498500e7d619655725872666b9f9fd52" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "da87006ec6bb393fbc91c7d2c0353762" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "648a275c6013e57738812adab23ac177" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "e6ded6642dd132b4a335af0f9d25739b" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "fada5417fb587c136d4e8f5932381bdb" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "40cc0bef5cdcea025fd887a73238cc3a" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "8290fa7ba9a97ecb8694cdfb9345cee7" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "bc7b1e0b4f96294a23882c6c1fee88ae" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "3796f0387dca1abbc70493876dc1e186" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 31100928, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19320832 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 19337216 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31084544 + } + ], + "md5sum": "58caae87b880eae83cd5f432be993588" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 4198400 + } + ], + "md5sum": "c48a3be5fea69d12e831bd4c36da9705" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "83e96c1c0b454436c5ecdca526a544cb" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "96a760fc6f1a93c9b03f2af1b8b1f6e6" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "f8521e6ca28cc7494286fb24a72812c2" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "18a1cf3cc0c0c5cc1a602c0ac2740b09" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "c1d9336302d64d855d6e631b75aa0d7a" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "433753bc5c42c42432235f094fdfb88f" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 22712320, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 3358720 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 7557120 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 10915840 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 10932224 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 22679552 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 22695936 + } + ], + "md5sum": "11eaad89933bf469d456f25f9183de98" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "90c4ba6b65d6cef760f1489e07c39a20" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "8d3e3d223313f93b0e8a0110c263af97" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "04b73b2006f0510ff2587e0744b0b75b" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "4b8958243f9d275a983e1b07157a4a4e" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "c4a0a27a05e09791506d90c29acf9a1b" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "bfab374553352672600d27d07ad21ec9" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "66f2b1be4a92027a6469a2445c28a6f9" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "04c87fe48631da1f6761b83a92deec89" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 31100928, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + } + ], + "md5sum": "2ce179596ef053aef8f53a76ac3b9ad7" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "489134828f6f3f28b7a7bf9abbe379c1" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "a7ac6ef7d0e645671a077c3067d9e449" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "400a7320678042de5caabe44f091dfd7" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "a4d11216fcbcfea2ad12e64606ac8243" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "c2d7c15c52d489fd647c566a1d045563" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "bad9a26b4433892985dde74e31f4ec6c" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "7870b2ae1855ee3cfe15c5ddcb6c7ffb" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "bb1db3648fa850d368c0c6c2c6746ece" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11747328 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15945728 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19304448 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19320832 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19337216 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 19353600 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31100928 + } + ], + "md5sum": "a20d167c8e51d9ba62e6149aec8653cb" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 4198400 + } + ], + "md5sum": "bc422b30d236a205518fb105e841d7c9" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "54876d25ae52b6928af06f93a053530c" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "8dd7955b58d1e55d76a355d2a3b28945" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "c09493a61693d13d51ef702d53e48a13" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "7705da9b8be6fa6adabfb2bde8203061" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "fec8a4e7977b34dd0746600620925669" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "8d97f183ea181245b66faabb40902335" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "d7a30e5409c2b5fe6bafc06c7890363d" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "d1be1ecb21fe99d5ce3425cc3382bded" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "dc768d6574513559b3e85907c63840e9" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "0fcc0138743ccfdb17bb07ec99bde1ab" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 30269440, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 0 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3358720 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 3375104 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15122432 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 15138816 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 19337216 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 22695936 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 26894336 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 30253056 + } + ], + "md5sum": "617720aa7077007ea16188b7e82c8894" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "18e1ccf6a2193e8faed1b8cc582d6e43" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "2a1d653e6e0c05e2ab7066b303f2460f" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "c06837d68b63e38a9ed1beae78b0b4b3" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "e4bdc9c67ba4bf824facf739a1b7d289" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "4fa864ffb1820efde00d0d13165ea395" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "cfa3f7eaebca9723b006f2a65be0275c" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31100928 + } + ], + "md5sum": "2841b2498a8b71cfa624ae247ddbe477" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "ef47e5ce850bc9a38d8e26ac28c0cbb3" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "7f424a08d9518cf963ae83356e7ecd16" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "a694a5ea096d54db3d5cb2da6e59183b" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "12665587f2c68ed529454ec4ceda2eeb" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "f5dff7072be20b233f96e344752127d4" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "b420223a62d70d1434133840d10429b9" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "f2d919e9e3b83e8401340db8a328c59c" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 26894336, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 19320832 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 23519232 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 26877952 + } + ], + "md5sum": "dbfcc7c2909af9419c2f44cc15da3a6a" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "e320c4c8b53d098497ee8753c045d39c" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "2eeb409d37c9a769e6cecd795b5a6cff" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "0d6e373f4aa67e19f6df37ad7efcba53" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "2dc482cce4504e150376f3b352118704" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "cf8286e4058d8ed5ad26fe72c8b27da2" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "cc32736d1b05aa4987fdebb12417cb00" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "0f8545391b3fadab013a2148aa96fcc5" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "29f393c3b84e2824489260a502661cd7" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 31100928, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + } + ], + "md5sum": "bea7d0cfb9ce9238ad75f11acbbb0f14" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "367edcc41e1d788d8adb5507a830c66d" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "b1b9fef412d17ba986f5c8bd696561f3" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "01a0f086115156e650d2b3c11bc6c9a3" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "3ba5fdea7d02d42ff0da87c944e22593" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "08ffa83e1b06b9be91249f0b613d2b86" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "b937e79fec4f7c027ce8a57da86e9b69" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "15ea022d907dd3cd4d5d84f494be6352" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "3a78ee3daecc6ad28ef5c722e3bbf4f6" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11747328 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15945728 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19304448 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19320832 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19337216 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 19353600 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31100928 + } + ], + "md5sum": "983fa27548f12bf8ab28ccabbc928303" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 4198400 + } + ], + "md5sum": "9f0e3eb538a830885c53493499869883" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "a9cf7d87045e3a274f944b4b52340528" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "f08cb573a488ed8ad30261089f18a32c" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "9c76c3a937e88bb5b46460dc988f74f5" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "9fbb4a021af95dd86a17de1272fe7cfa" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "c658da5c8acf865d0a566e0fce9ab4dd" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "7150f4043b2cfbe4e59a1775f9d782ff" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "e4cbeb7f3df78e2dc188729a50b4b407" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "377070663b3de24283d841088e9dca54" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "27bdbc933a9f79b617e6d297cee1920c" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "6d3b32fce307b8aebfb2f790f333d95f" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 30269440, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 0 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3358720 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 3375104 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15122432 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 15138816 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 19337216 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 22695936 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 26894336 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 30253056 + } + ], + "md5sum": "016e12c4785c3a7011b02c80ec04702d" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "44b2d1302b1284a8fe9a13ea3ede10da" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "ba611304c99040256165612871fb2028" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "df9bb54fa2501c01f5506bb7aecbe65e" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "eaac9901d6a8eb62b941f87314d2826d" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "ed682c3733c291f5fee8b54700c4c1f9" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "812051b64726cfecc21031a1da545572" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31100928 + } + ], + "md5sum": "a142000519593bf974a55aba53008c2d" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "621bc719ae2c5de2a2e0ab20492fac63" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "97f9465f54a17dfaa1bf4aafde135969" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "68a93175476deca15cbcb9f55f40d24d" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "f57e495ebdf78e291f0f750ef980ef2d" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "a27af753635af6fddcb11bc14991d956" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "9bb7e5c0564ba36c25b1701fc78b344a" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "65d01f20f54f6584594d3ad873f92ef8" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 26894336, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 19320832 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 23519232 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 26877952 + } + ], + "md5sum": "c72e043d1d7ad4d0b977db951fb60f7a" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "ed78eb938dad99e3541579186e52bb15" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "a6f50b492b87e355b9a83a3518fd8919" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "b0de754c69cec7ed55cd67e05111493e" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "b5b8955bb9f75b27a5c8b35888864465" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "6e8c3f86183d1fe81370429df588e500" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "4aefbda2f0fd20f41f52464291601613" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "7505ba54ef4fc246df22f362b9a36dff" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "24a6a212f8c1d17113a8b86d913ebd54" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 31100928, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + } + ], + "md5sum": "9f5307c2fe6afd78c2fb0631ef830835" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "157a40701fea71f1ef145a3591d6c792" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "7de91182618ed0a6cf074a7bea69b33d" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "c5dad0da2399f38a428e1674a152d2a3" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "5de67f15b2ea25de5c5b3af55a17908e" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "e3bec31e0e63a02b9594336a97be77b4" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "3cde5caf73e48173d5d6d9fe99f5c30c" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "9bb82182e5c9dc9df722fded67bb27f8" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "49bba5322f51e42e720b192a0c56c627" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11747328 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15945728 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19304448 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19320832 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19337216 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 19353600 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31100928 + } + ], + "md5sum": "8dd51aa134c12ac7eb5bd2dd5365223e" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 4198400 + } + ], + "md5sum": "70ca315f474cd854d1c4d0fa2e1975ac" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "a20b81c204b2f538b3a45bd565bf385a" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "f2d0191b59c7aad2711c5ec1738a8e8b" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "9ae5a6a1730f9f6eae3f6b434292e035" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.32.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "1d4b082dc5696d895e0b3907ad0bc6d0" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "1665aafee01f1a754cd551449012db2f" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "b917de1e91eacbb66b07092786fe2172" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "3210de5411c68f57362e8cb1663863f3" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.33.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "e3d1c0c01ad3e495407d1a1db17ae019" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "3fee5dd7b684aaadcdae91691a8b1a94" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "f1e7170151655fa7f492a229c0df60f2" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 30269440, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 0 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3358720 + }, + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 3375104 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15122432 + }, + { + "name": "model.layers.32.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 15138816 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 19337216 + }, + { + "name": "model.layers.33.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 22695936 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 26894336 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 30253056 + } + ], + "md5sum": "b11f92907358fb38597f59e3759b162c" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "0c481e06bd9145a0cac3cd8c26bb6a0c" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "4125037d21152f34255d77e1b6548a30" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "c614640631b289dba8ff8e1cea5154d8" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.34.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "0c1f7edccaf97ddec0f0d85e4e9d4781" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "224580ce056c7e35f5d963065ff83197" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "5ba66eecc6bd0cf1724227c09edb4707" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.34.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31100928 + } + ], + "md5sum": "b753ae6a1e6ffc1c0aeb0b3642f58752" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "0394f92cda1fc754e5ccd5ff7e3ef3ee" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "f4b64535f8ff1d0df51b4ba4d3730b20" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.35.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "8f96c033acd73173a5d0f1d518272c06" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "183d991813178464a98a623057a47c27" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.36.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "0e48d582f014aba0271df39b56ae5399" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "1a2ed98c6a5c47d6692a584cb7c93a4e" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "bcbdf757be755731408047defd04849f" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 26894336, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.35.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.layers.36.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 19320832 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 23519232 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 26877952 + } + ], + "md5sum": "d0c10b2f4fa4720b3316ce95b3000a35" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "6ff0bdcb5313b1dc887f910ebaad42b5" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "21a771c9202dcefeaa3a88c93bfe9e82" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "12d89078266a1c81c4829f5c2ae0738c" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "0220ba7678c15022bae8ecfde3170ff2" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "cbec37abb544d39799cd2284f76e02ae" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.37.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "4e36002419848e8dd8a09aa494400ade" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "2ebc1020c129d9c5913382946316f67b" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "18dc668ccb9bbfde6d662ebcc19c6dc5" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 31100928, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.37.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + } + ], + "md5sum": "5420735586ece127dc9321867e299feb" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "d30b1fd8aca43f2150462f20eb7851e1" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "d4dd3a7ae925fd30931fe22a72ee8ae7" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.38.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "2af7cda2d8c2d2ed3c4b32433a5fdbe2" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "f42fab56c72ca08ea031081dbe8307ce" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "672b3c3e5a26fbf0f3a1cbb6ff153bd8" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "3d1f04f2f781d1eae42435b2903fef17" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "385bb2584bc2fc4d249c71673c614d00" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.39.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "c6687327717f84b381d5a4438ed8f9f1" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.38.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11747328 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15945728 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19304448 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19320832 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19337216 + }, + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 19353600 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31100928 + } + ], + "md5sum": "4e765844edc9c7a69a1cbd446f657004" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.39.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 0 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 4198400 + } + ], + "md5sum": "5851dbc02dead406b2aaaa2a120b914e" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "d4f2b2bd20daf485cb5a728d287480cf" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "4364fd08f9028e05724cb2ae371f5cfe" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "0992d734194973e9bdc5d6dcb00a580b" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.40.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "e2e9d2a48f94104c9954c1412c30d7df" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "d336dfeac2cc54a74acbdb5b5bc835c0" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "521220305954565ff05187d935d6b37f" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "0fea18febf81f667b4c4e86fb7f14c9b" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.41.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "f7ddaaeeeb51ad30c1258d027d58214b" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "52ca54dfd681b97ac7ae81c9a99e57c8" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "991a1997d1e3a954b1f1051bf930d313" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 30269440, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 0 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3358720 + }, + { + "name": "model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 3375104 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15122432 + }, + { + "name": "model.layers.40.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 15138816 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 19337216 + }, + { + "name": "model.layers.41.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 22695936 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 26894336 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 30253056 + } + ], + "md5sum": "4b9e9bba32efefdd565eee8bc3eea968" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "1a465a11b604ecc8c706611878cb7b69" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "817a46ccd8e043f89c97897d56ca3e4e" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "2654a05e477a45d1a1e5f568dd77d9e5" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "7872709c49bcf64a7dd791a898d09ac4" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "b0a57b8c72d315526c5a595ed189fd63" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "01ea6839e30720e40089ab5881b0e5c9" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "39a9d3fcc6b90bdeef4b2195d6e0b40b" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "a742f0f2521cfa0a644158ba83e820da" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 31100928, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + } + ], + "md5sum": "e709c080d6b2f7e1e101dfb61d9ace81" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "bcb8d39ad513f01d816fc687a6d4d688" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "6c07ca0c6f9b4c3e5bdb745c27fbdfdf" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "4754816344f896f86767498aaad44882" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "954cdec37463577e7b5a718972b9e6bb" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "33a703a63cf835055c258166cc98552d" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "7e408a959ec304e6d59a3786dd854ec2" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 31100928, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11747328 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15945728 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19304448 + }, + { + "name": "model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 19320832 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31068160 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31084544 + } + ], + "md5sum": "3ed58a2b27f2677eba9838376323493d" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "61e0f64180157032823a3b266f990e5b" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "37f1f835ed3cb06e215f73f5b0ed50ad" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.42.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "761de693d7f835d434c34ff4fb8586e6" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "4e07a2ad8809494003b0fb05f817ff24" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "c06f3047718a153be4682ac111998991" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "2046dd817101791efbc43d36dd73d3fc" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "ee6dd220ad371b568fb5ea5b4c5651d6" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.43.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "71dc43acee7752e1a4cf630b64521a5a" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 31100928, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.42.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19320832 + }, + { + "name": "model.layers.43.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 19337216 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31084544 + } + ], + "md5sum": "c23dc6471d6da16c6a5e75b8243b050f" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.43.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 0 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 4198400 + } + ], + "md5sum": "921366bb7b14ce8d8c0f2398d084ccfa" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.44.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "2f95dbfc3baabe8e65b23236b471dd9c" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "8dccfe47bb78bf6fbd8d3add1a2b1c00" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "874e8b3414223c0cca1352ada3d23696" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "64650be0d5ce2d55c7c260c3fa79950e" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "adafe9aa0839f572e64815b768860be0" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "9b7bd742d747ac24e8bf76dade6d5393" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 22712320, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 0 + }, + { + "name": "model.layers.44.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 3358720 + }, + { + "name": "model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 7557120 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 10915840 + }, + { + "name": "model.layers.44.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 10932224 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 22679552 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 22695936 + } + ], + "md5sum": "4a31044ef3530a88e6071f77454dad93" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "ddd3a1c5d7e09c5bf2b95cf918acc123" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "de6c1fb527370e19e2e41c14ff24882e" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.45.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "dcecd59bf59436062784b0cf02869217" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "e0d405acd3457ddc9e98a0ea51980452" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "bbfd85a99502f091441a7326545bbd61" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "a82ba6d1fd2136f42431b86c63dc5aab" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "9f996c05c649744065b8d7031164429e" + }, + { + "dataPath": "params_shard_259.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.46.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "cffa008c7ee89782300d4b5621264cd1" + }, + { + "dataPath": "params_shard_260.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.45.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.layers.46.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 19320832 + } + ], + "md5sum": "f508f58e8f5e0f243987df1f7cd83cac" + }, + { + "dataPath": "params_shard_261.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.46.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 0 + }, + { + "name": "model.layers.46.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 4198400 + } + ], + "md5sum": "1fcc295f78fdf674bb0048484bea97a9" + }, + { + "dataPath": "params_shard_262.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "99290a4369daf37f60e38de361edb43e" + }, + { + "dataPath": "params_shard_263.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "f79e051a484e519f96d4951c202e09b3" + }, + { + "dataPath": "params_shard_264.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "a3ac7387832fe6ae0163dcda41c81036" + }, + { + "dataPath": "params_shard_265.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.47.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "d55f5e17e7367bf76bb8b1a3ff967977" + }, + { + "dataPath": "params_shard_266.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "cd3437ba9b0e622fedd18486410d39e4" + }, + { + "dataPath": "params_shard_267.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "76293b082edeff9f8a843700788e09c7" + }, + { + "dataPath": "params_shard_268.bin", + "format": "raw-shard", + "nbytes": 22745088, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 0 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3358720 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3375104 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3391488 + }, + { + "name": "model.layers.47.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 3407872 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15155200 + }, + { + "name": "model.layers.47.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 15171584 + }, + { + "name": "model.layers.47.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 19369984 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 22728704 + } + ], + "md5sum": "f9d43905553c6f2d0c58e79635a3af39" + }, + { + "dataPath": "params_shard_269.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "287b2afa1cd7495d94b256d9a03649a4" + }, + { + "dataPath": "params_shard_270.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "fd9006c3c336100f1f1aed2d8002f5e9" + }, + { + "dataPath": "params_shard_271.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.48.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "bec8b8fd67c7359b2638a5836b7ca398" + }, + { + "dataPath": "params_shard_272.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.48.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "7091b7be181617352177fb28230671f9" + }, + { + "dataPath": "params_shard_273.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "106b0f099661ef8de8477db075fe543b" + }, + { + "dataPath": "params_shard_274.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "c2d79ad732501993b7efa49c77026e97" + }, + { + "dataPath": "params_shard_275.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.49.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "83ddab4df74bb514e91be38f677a168a" + }, + { + "dataPath": "params_shard_276.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "2735778604f0ceeda5707f32895978f4" + }, + { + "dataPath": "params_shard_277.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "d9e40bafb9fd186187852c24fbed8111" + }, + { + "dataPath": "params_shard_278.bin", + "format": "raw-shard", + "nbytes": 26894336, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.48.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.48.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.layers.49.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 19320832 + }, + { + "name": "model.layers.49.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 23519232 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 26877952 + } + ], + "md5sum": "9611f299165413dd21ec751793ae5586" + }, + { + "dataPath": "params_shard_279.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "e26305bf52bb73634d6d4c80c534f215" + }, + { + "dataPath": "params_shard_280.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "399075db58968e9076057bf53613c836" + }, + { + "dataPath": "params_shard_281.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "6fca6946a1f36c78c5d40109f290b3e1" + }, + { + "dataPath": "params_shard_282.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.50.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "15fe4436441c1e07ff31bc872a8e89b0" + }, + { + "dataPath": "params_shard_283.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "a33c1d7c7b4d3ed4a28b825d832a05e6" + }, + { + "dataPath": "params_shard_284.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "99d06bd11751d72ca26261ed3584b9ac" + }, + { + "dataPath": "params_shard_285.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.50.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.50.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.50.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31100928 + } + ], + "md5sum": "a41f6473d1735ceffa7e921ccadf7165" + }, + { + "dataPath": "params_shard_286.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "03cdee1665028de3da9e627e0f5bf0fb" + }, + { + "dataPath": "params_shard_287.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "9c5f4804b2a5d3947e650bcb98275984" + }, + { + "dataPath": "params_shard_288.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.51.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "274e65f2a2b67c0e43478f85c1bc95b1" + }, + { + "dataPath": "params_shard_289.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "a2f6efcce2511afd4916e0b3756541ca" + }, + { + "dataPath": "params_shard_290.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.52.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "046941ee145f7fc5e4fa860aefec1c4d" + }, + { + "dataPath": "params_shard_291.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "b2ef75c75c7084037ec0bba4bc9e30a9" + }, + { + "dataPath": "params_shard_292.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "22f31c0ab08b8229e9d382850bfaaf88" + }, + { + "dataPath": "params_shard_293.bin", + "format": "raw-shard", + "nbytes": 26894336, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.51.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.51.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.layers.52.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 19320832 + }, + { + "name": "model.layers.52.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 23519232 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 26877952 + } + ], + "md5sum": "64ec5cce934ad9b02f461c4e3bba09a1" + }, + { + "dataPath": "params_shard_294.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "2281cefd20f468d1fa90fdb1fcb99892" + }, + { + "dataPath": "params_shard_295.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "63d053a6a4d91b876662c6f3d5c0e6ac" + }, + { + "dataPath": "params_shard_296.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "4d41976521e5b5315067708dde954153" + }, + { + "dataPath": "params_shard_297.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "64c7066023d153660265f6b10631c6ce" + }, + { + "dataPath": "params_shard_298.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "9084fc90c3e58b5309a56dde1c9f96ba" + }, + { + "dataPath": "params_shard_299.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.53.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "05b385ccbbbd44c0c7743266232412b7" + }, + { + "dataPath": "params_shard_300.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.53.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "859ae94b0a7f64e959327794833acef4" + }, + { + "dataPath": "params_shard_301.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "88c500d958af3d5b754026cacb00a1f1" + }, + { + "dataPath": "params_shard_302.bin", + "format": "raw-shard", + "nbytes": 31100928, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.53.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.53.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.53.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + } + ], + "md5sum": "90be7fee0d3fb485de33a389294a5cd3" + }, + { + "dataPath": "params_shard_303.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "c5246d5920bd285198f07f8ae2798f93" + }, + { + "dataPath": "params_shard_304.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "fdb23d15d58f8a6715cb50f63d671b0e" + }, + { + "dataPath": "params_shard_305.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.54.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "07b370d7086be05c278c28ce775d4014" + }, + { + "dataPath": "params_shard_306.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "f390eda09898fb3f8fb0aec590f484af" + }, + { + "dataPath": "params_shard_307.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "cb29e19803db0aa08267bf4931116b8d" + }, + { + "dataPath": "params_shard_308.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "a373627e7b9a394cad64cbfb33c81d39" + }, + { + "dataPath": "params_shard_309.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "98f5ff8ba15a055d327e91e5a6912869" + }, + { + "dataPath": "params_shard_310.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.55.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "a051e151a4d793c2a455b76c542c07e6" + }, + { + "dataPath": "params_shard_311.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.54.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11747328 + }, + { + "name": "model.layers.54.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15945728 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19304448 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19320832 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19337216 + }, + { + "name": "model.layers.55.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 19353600 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31100928 + } + ], + "md5sum": "86a987bbfb8a348a6bb0c89083a1cf73" + }, + { + "dataPath": "params_shard_312.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.55.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 0 + }, + { + "name": "model.layers.55.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 4198400 + } + ], + "md5sum": "eca106ddd53c90fbb8c1663ea3e6535f" + }, + { + "dataPath": "params_shard_313.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "1de9f438db75b8760a372ea58917f79a" + }, + { + "dataPath": "params_shard_314.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "460af39dc05b9dca25c01dd28fdde770" + }, + { + "dataPath": "params_shard_315.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "608e41fcddafe4d3fe937c0229ff4798" + }, + { + "dataPath": "params_shard_316.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.56.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "dac3ba5521396ce0bb641f43288eb5e2" + }, + { + "dataPath": "params_shard_317.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "c5926f5fb716bfc0ba28a9de50d75641" + }, + { + "dataPath": "params_shard_318.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "92d04ca9bee9ad721777b1410b586bdb" + }, + { + "dataPath": "params_shard_319.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "a25147abf0016d425306edb121d51b44" + }, + { + "dataPath": "params_shard_320.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.57.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "351d29ab4386e56e322cc3d6cd286054" + }, + { + "dataPath": "params_shard_321.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "4e747bc53d8c7836599899681b38d7f1" + }, + { + "dataPath": "params_shard_322.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "4175f582894e88ea13768e838d7a48d9" + }, + { + "dataPath": "params_shard_323.bin", + "format": "raw-shard", + "nbytes": 30269440, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 0 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3358720 + }, + { + "name": "model.layers.56.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 3375104 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15122432 + }, + { + "name": "model.layers.56.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 15138816 + }, + { + "name": "model.layers.56.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 19337216 + }, + { + "name": "model.layers.57.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 22695936 + }, + { + "name": "model.layers.57.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 26894336 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 30253056 + } + ], + "md5sum": "264a51b8dfccfb7657ccb8fadd21b4b2" + }, + { + "dataPath": "params_shard_324.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "68d47891234abad20d2d9eefab61adfe" + }, + { + "dataPath": "params_shard_325.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "91573f7d4c0743a6e9aad28490ed1981" + }, + { + "dataPath": "params_shard_326.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "8281982436ee94a9942b9d00b7495f42" + }, + { + "dataPath": "params_shard_327.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.58.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "b8cc87a331a9b37febaf4e1b2b7c0782" + }, + { + "dataPath": "params_shard_328.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.58.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "95be679b46773a31ed8a2559f0e66d01" + }, + { + "dataPath": "params_shard_329.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "a7b3ca8d71151e3ade0b5b83d38fa331" + }, + { + "dataPath": "params_shard_330.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.58.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.58.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.58.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31100928 + } + ], + "md5sum": "3e046d2c1cddd2aa44b3629b55c998ae" + }, + { + "dataPath": "params_shard_331.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "a3d8879de12800cb5cd2f83ecf778a2a" + }, + { + "dataPath": "params_shard_332.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "503e67d7f5d93136ae93e681f224e1ad" + }, + { + "dataPath": "params_shard_333.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.59.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "d16f49863ed92d94dcca4a1c8f8a4c8f" + }, + { + "dataPath": "params_shard_334.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "bd86aea4b4b7ce426d82475028f597c1" + }, + { + "dataPath": "params_shard_335.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.60.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "8a00e0c0d4e2f7efdfd595df91ab904e" + }, + { + "dataPath": "params_shard_336.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "7bb47a523d7b5dd44ffd81e0baef840c" + }, + { + "dataPath": "params_shard_337.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "a8ffd77538af8e71cfdea709a374ffb9" + }, + { + "dataPath": "params_shard_338.bin", + "format": "raw-shard", + "nbytes": 26927104, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.59.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.59.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.layers.60.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 19320832 + }, + { + "name": "model.layers.60.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 23519232 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 26877952 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 26894336 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 26910720 + } + ], + "md5sum": "98ae2c2410ece77c4eecdd073a1da098" + }, + { + "dataPath": "params_shard_339.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "34c77418f2687456e5b44537fba7ab2b" + }, + { + "dataPath": "params_shard_340.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "206b28d565a97cc573a59020460607ee" + }, + { + "dataPath": "params_shard_341.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "2e337453cc2b308d1233834075d0a766" + }, + { + "dataPath": "params_shard_342.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "ff979b78edabafb401b4cd10598e1ade" + }, + { + "dataPath": "params_shard_343.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "32d5412c6c0d6de8c593027671dfe99b" + }, + { + "dataPath": "params_shard_344.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "87074e0f970804330290a9847aab3925" + }, + { + "dataPath": "params_shard_345.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "4eb190b1a8a2ec79d66e1580b28dd199" + }, + { + "dataPath": "params_shard_346.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "b3e48bbb52b6e9601e1a2fe2ae60777c" + }, + { + "dataPath": "params_shard_347.bin", + "format": "raw-shard", + "nbytes": 31100928, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19320832 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 19337216 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31084544 + } + ], + "md5sum": "92681d16ccc34e7d4c82e4f87ec75fe2" + }, + { + "dataPath": "params_shard_348.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 4198400 + } + ], + "md5sum": "2ec25d667518ea683b04edf516437959" + }, + { + "dataPath": "params_shard_349.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "6a1032b44004d8a878825c56f078b750" + }, + { + "dataPath": "params_shard_350.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "51e6bcd968c011119719c57ad56d1c2f" + }, + { + "dataPath": "params_shard_351.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "333d8cd15dbafe0f00ead5ef000ec285" + }, + { + "dataPath": "params_shard_352.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "f032cdeba28dcbadfaf30eae8c91370c" + }, + { + "dataPath": "params_shard_353.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "d438fdd6c2041e41708012c026ae7a0a" + }, + { + "dataPath": "params_shard_354.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "55af4662a681e730905814a7ae866767" + }, + { + "dataPath": "params_shard_355.bin", + "format": "raw-shard", + "nbytes": 22712320, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 3358720 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 7557120 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 10915840 + }, + { + "name": "model.layers.60.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 10932224 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 22679552 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 22695936 + } + ], + "md5sum": "8f17e0afe9b4d542a64c360f679c8b91" + }, + { + "dataPath": "params_shard_356.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "8d6d1d1505e60a432907b7b0bd2834a2" + }, + { + "dataPath": "params_shard_357.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "3b808af2e6205a622882dc681d35fcd6" + }, + { + "dataPath": "params_shard_358.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.61.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "d98730f498b9bd6200bfe903aeeb91c2" + }, + { + "dataPath": "params_shard_359.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "cc3f84e9aa38eaae33f07999348ba88a" + }, + { + "dataPath": "params_shard_360.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "9d68e2bb14fe38605be1ca06b5fe8460" + }, + { + "dataPath": "params_shard_361.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "c6e7e608cc115420bbdd55870fb9b303" + }, + { + "dataPath": "params_shard_362.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "d6673af75ace5850ea4cd2d50e812d7f" + }, + { + "dataPath": "params_shard_363.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.62.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "5ecdce62b6193895f8521f90d0e1b492" + }, + { + "dataPath": "params_shard_364.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.61.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.61.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.layers.62.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 19320832 + } + ], + "md5sum": "461994694a1c492f6c29494fc44af03d" + }, + { + "dataPath": "params_shard_365.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.62.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 0 + }, + { + "name": "model.layers.62.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 4198400 + } + ], + "md5sum": "494c75ab559a525e4ee951a203ea6b10" + }, + { + "dataPath": "params_shard_366.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "00e295a6944ee7d5ed84ac8b3256f019" + }, + { + "dataPath": "params_shard_367.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "b17ef45bb9e58f95dcbaabe54e6a56b7" + }, + { + "dataPath": "params_shard_368.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "9af75fefc0e169b8d65ad05515da2e51" + }, + { + "dataPath": "params_shard_369.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.63.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "8ad448f06b14485569d75fb43d35be65" + }, + { + "dataPath": "params_shard_370.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.63.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "b866d3629baa0cac8ee4b421075e8559" + }, + { + "dataPath": "params_shard_371.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.64.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "1905be82a4bcf2c715379d42bc3f6f62" + }, + { + "dataPath": "params_shard_372.bin", + "format": "raw-shard", + "nbytes": 22745088, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 0 + }, + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3358720 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3375104 + }, + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3391488 + }, + { + "name": "model.layers.63.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 3407872 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15155200 + }, + { + "name": "model.layers.63.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 15171584 + }, + { + "name": "model.layers.63.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 19369984 + }, + { + "name": "model.layers.64.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 22728704 + } + ], + "md5sum": "19cc89fa867aa1e40f7b82c6b7025a2c" + }, + { + "dataPath": "params_shard_373.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.64.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "d2361f66dc39900d785342622e86aaea" + }, + { + "dataPath": "params_shard_374.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.64.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "1161815424eb1a5d418569b74e207c15" + }, + { + "dataPath": "params_shard_375.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.64.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "4ee5ddf678ed5473ce4d325e7caf5ee1" + }, + { + "dataPath": "params_shard_376.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.64.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "89336d0236f7f68ee36dbb989ebbe6b1" + }, + { + "dataPath": "params_shard_377.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.65.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "5d011b7f29e325f953f0cfaad062952b" + }, + { + "dataPath": "params_shard_378.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.65.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "11e39823e86dd5a45e66ed062d41d60f" + }, + { + "dataPath": "params_shard_379.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.65.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "55a60386a701fb9e3b3d7c1bdc4af8d9" + }, + { + "dataPath": "params_shard_380.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.65.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "b36f625f80a00425193babb5ae6351ec" + }, + { + "dataPath": "params_shard_381.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.65.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "2255c305893c24fb7f194dd8a8458e69" + }, + { + "dataPath": "params_shard_382.bin", + "format": "raw-shard", + "nbytes": 26894336, + "records": [ + { + "name": "model.layers.64.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.64.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.64.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.64.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.layers.65.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 19320832 + }, + { + "name": "model.layers.65.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 23519232 + }, + { + "name": "model.layers.65.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 26877952 + } + ], + "md5sum": "030f2d4e76b5811cfa0edf58d26b4c86" + }, + { + "dataPath": "params_shard_383.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.66.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "02acbc2f72c41b73e625642f6b0c2c66" + }, + { + "dataPath": "params_shard_384.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.66.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "d35653626965caa7fd4ea488312e88d0" + }, + { + "dataPath": "params_shard_385.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.66.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "3272b0bdd640e8a6dce90697d8e8e402" + }, + { + "dataPath": "params_shard_386.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.66.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "3803aa08914668f22804216d6e0626c9" + }, + { + "dataPath": "params_shard_387.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.66.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "4031e076f5befac4cad769f5a6d0ab96" + }, + { + "dataPath": "params_shard_388.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.67.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "f6c26b4b7ee7204649605e0bbb935d59" + }, + { + "dataPath": "params_shard_389.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.65.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.65.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.66.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.66.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.66.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.66.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.66.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + }, + { + "name": "model.layers.67.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31100928 + } + ], + "md5sum": "0709eb256cf360df41c781c574cbb481" + }, + { + "dataPath": "params_shard_390.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.67.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "829e6536ad321c34f04cee50abaa7dcb" + }, + { + "dataPath": "params_shard_391.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.67.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "593270897d80f56f7a102d08e7e80a92" + }, + { + "dataPath": "params_shard_392.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.67.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "e3f0e0a3b93b1e55aac126c72b571fe0" + }, + { + "dataPath": "params_shard_393.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.67.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "32b2567860b78654790f2cd8ed8c9b69" + }, + { + "dataPath": "params_shard_394.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.68.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "3b64f245685eeec6b5c98a4b9dab6fd7" + }, + { + "dataPath": "params_shard_395.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.68.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "d31cf7562328bf054be0b4fa7ef14ae3" + }, + { + "dataPath": "params_shard_396.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.68.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "230b2a92635334b271d102cf5c4ba55a" + }, + { + "dataPath": "params_shard_397.bin", + "format": "raw-shard", + "nbytes": 26894336, + "records": [ + { + "name": "model.layers.67.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.67.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.67.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.67.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.layers.68.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 19320832 + }, + { + "name": "model.layers.68.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 23519232 + }, + { + "name": "model.layers.68.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 26877952 + } + ], + "md5sum": "c22fc9ef36f33564ed8a8592b03f3024" + }, + { + "dataPath": "params_shard_398.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.68.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "0fbe007352c16f6560d3352e4c5db679" + }, + { + "dataPath": "params_shard_399.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.68.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "9360aabbfb108b401c01e2fd9f65ee13" + }, + { + "dataPath": "params_shard_400.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.69.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "957605c23cbe243f7006ee9c191c8e31" + }, + { + "dataPath": "params_shard_401.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.69.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "145aad128f93ee452650e119428b53e4" + }, + { + "dataPath": "params_shard_402.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.69.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "eaff1b54d0bf89894409d46cdcac96c7" + }, + { + "dataPath": "params_shard_403.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.69.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "3dbfd2aa1d8cddf606e0cd822849fcc0" + }, + { + "dataPath": "params_shard_404.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.69.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "57d9b35db3e0a4fe126e888abb6d8ad2" + }, + { + "dataPath": "params_shard_405.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.70.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "cfe2143188bcb52cfe67fe614b4dea26" + }, + { + "dataPath": "params_shard_406.bin", + "format": "raw-shard", + "nbytes": 31100928, + "records": [ + { + "name": "model.layers.68.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.68.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.69.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.69.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.69.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.69.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.69.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + } + ], + "md5sum": "e156d3b660a48a30eef432a0f814525c" + }, + { + "dataPath": "params_shard_407.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.70.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "bb052b67e724de27f995a55153935d50" + }, + { + "dataPath": "params_shard_408.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.70.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "c3711fd8eef84bdafc43e211fe4bd23f" + }, + { + "dataPath": "params_shard_409.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.70.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "dc5b14819f1866e0833a47329c42bbbc" + }, + { + "dataPath": "params_shard_410.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.70.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "55bd3330306b73ffa437b1e7ed7e5acb" + }, + { + "dataPath": "params_shard_411.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.71.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "910d02ad4bd35f13ad04dfe970116f0b" + }, + { + "dataPath": "params_shard_412.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.71.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "5cc90024eec2c08871a832c5fab0f141" + }, + { + "dataPath": "params_shard_413.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.71.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "cdb0d65a702c5395f8be65d3228b8031" + }, + { + "dataPath": "params_shard_414.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.71.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "ce522cb9249279a8f7af2e52e2529298" + }, + { + "dataPath": "params_shard_415.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.70.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.70.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11747328 + }, + { + "name": "model.layers.70.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15945728 + }, + { + "name": "model.layers.70.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19304448 + }, + { + "name": "model.layers.70.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19320832 + }, + { + "name": "model.layers.71.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19337216 + }, + { + "name": "model.layers.71.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 19353600 + }, + { + "name": "model.layers.71.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31100928 + } + ], + "md5sum": "7a91a43e0a908ba17a3cbb25cc6f7408" + }, + { + "dataPath": "params_shard_416.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.71.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 0 + }, + { + "name": "model.layers.71.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 4198400 + } + ], + "md5sum": "ae7ca3d81212fce9532e17fba66407fc" + }, + { + "dataPath": "params_shard_417.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.72.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "85a63b2b2d80eed883db21241595428f" + }, + { + "dataPath": "params_shard_418.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.72.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "01c30545379aa46902536d37815b7565" + }, + { + "dataPath": "params_shard_419.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.72.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "976fbaa02228704868689c5f499a2b99" + }, + { + "dataPath": "params_shard_420.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.72.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "4923d17a19472a5c45567fed1faea958" + }, + { + "dataPath": "params_shard_421.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.72.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "c8fbdd5172c19f6e72ebe5746537d533" + }, + { + "dataPath": "params_shard_422.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.73.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "5a004946b1a957578301b54da1b8bb9d" + }, + { + "dataPath": "params_shard_423.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.73.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "41b7771aabffb8e267878e0596610872" + }, + { + "dataPath": "params_shard_424.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.73.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "f8514da139425f288acdd987b21867a1" + }, + { + "dataPath": "params_shard_425.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.73.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "5787cf798de8809937c06bcd241643d2" + }, + { + "dataPath": "params_shard_426.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.73.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "3fb1dd3688962e98e8797e856fcad890" + }, + { + "dataPath": "params_shard_427.bin", + "format": "raw-shard", + "nbytes": 30269440, + "records": [ + { + "name": "model.layers.71.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 0 + }, + { + "name": "model.layers.72.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3358720 + }, + { + "name": "model.layers.72.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 3375104 + }, + { + "name": "model.layers.72.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15122432 + }, + { + "name": "model.layers.72.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 15138816 + }, + { + "name": "model.layers.72.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 19337216 + }, + { + "name": "model.layers.73.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 22695936 + }, + { + "name": "model.layers.73.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 26894336 + }, + { + "name": "model.layers.73.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 30253056 + } + ], + "md5sum": "8a6b1820d256dc53653b6f1b2901c2e9" + }, + { + "dataPath": "params_shard_428.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.74.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "e528155b4e64bfb630afbee2d76565b8" + }, + { + "dataPath": "params_shard_429.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.74.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "c936f30b48cab54b4c24ba6a17e77bc5" + }, + { + "dataPath": "params_shard_430.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.74.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "a17aac543ccda84f5e1402b55127c883" + }, + { + "dataPath": "params_shard_431.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.74.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "c1307ddf9f5562343ea401252e050fa4" + }, + { + "dataPath": "params_shard_432.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.74.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "a07227861a47329282497be0bd782943" + }, + { + "dataPath": "params_shard_433.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.75.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "c347525b638a91220ec10a1255da17bb" + }, + { + "dataPath": "params_shard_434.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.73.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.73.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.74.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.74.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.74.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.74.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.74.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + }, + { + "name": "model.layers.75.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31100928 + } + ], + "md5sum": "7f834ee5f263e12fd9cf6cff0a8c7e8f" + }, + { + "dataPath": "params_shard_435.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.75.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "50549276555aebee091f7f3d01312c78" + }, + { + "dataPath": "params_shard_436.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.75.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "7edb01889ff4c337dc9632cde348dd44" + }, + { + "dataPath": "params_shard_437.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.75.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "4ee201db2047844f5dcb0224b3cfcbc6" + }, + { + "dataPath": "params_shard_438.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.75.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "e804dcb5709814ba0c90dca8972b362b" + }, + { + "dataPath": "params_shard_439.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.76.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "a8bc6d6575b62d828b8f02ee1345ad51" + }, + { + "dataPath": "params_shard_440.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.76.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "70e27dd0b3ef83ff317b6cbad1f47ca6" + }, + { + "dataPath": "params_shard_441.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.76.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "02c11261e04334ca471c21bd6e0cb2b9" + }, + { + "dataPath": "params_shard_442.bin", + "format": "raw-shard", + "nbytes": 26894336, + "records": [ + { + "name": "model.layers.75.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.75.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.75.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.75.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.layers.76.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 19320832 + }, + { + "name": "model.layers.76.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 23519232 + }, + { + "name": "model.layers.76.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 26877952 + } + ], + "md5sum": "84da63f6d7c0059583d7c1b406b524ae" + }, + { + "dataPath": "params_shard_443.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.76.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "3bbe07e89e43dbc423fc1b8466df6f9e" + }, + { + "dataPath": "params_shard_444.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.76.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "3a9d58ca445bd696d597068881b9e93e" + }, + { + "dataPath": "params_shard_445.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.77.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "fb260cb64cd5e6e86928383872a4d62f" + }, + { + "dataPath": "params_shard_446.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.77.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "afba82d3423f1a1d807a3d0fccf5be59" + }, + { + "dataPath": "params_shard_447.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.77.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "cbd4b56024a1130ae2837b9a18310593" + }, + { + "dataPath": "params_shard_448.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.77.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "c9e27d332f2dcc73659585911426b5d7" + }, + { + "dataPath": "params_shard_449.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.77.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "344ce6f22bb80c6a0c6547b9961a4de6" + }, + { + "dataPath": "params_shard_450.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.78.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "acbb7fc5c0e6a645628469e089aa0849" + }, + { + "dataPath": "params_shard_451.bin", + "format": "raw-shard", + "nbytes": 31100928, + "records": [ + { + "name": "model.layers.76.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.76.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.77.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.77.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.77.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.77.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.77.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + } + ], + "md5sum": "64728d17793592ea9a6455ee5a9b1cba" + }, + { + "dataPath": "params_shard_452.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.78.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "48b4026dfa37979020a6ff463fefbcbb" + }, + { + "dataPath": "params_shard_453.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.78.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "11201d707615a706bb3ccae4b88df535" + }, + { + "dataPath": "params_shard_454.bin", + "format": "raw-shard", + "nbytes": 19304448, + "records": [ + { + "name": "model.layers.78.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.78.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11747328 + }, + { + "name": "model.layers.78.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15945728 + } + ], + "md5sum": "799e527fd4fccd7da7e4c8e1cddf2f9c" + } + ] +} \ No newline at end of file