diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,12543 @@ +{ + "metadata": { + "ParamSize": 805, + "ParamBytes": 38801408000.0, + "BitsPerParam": 4.353045149919394 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 131137536, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 32016, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131137536, + "byteOffset": 0 + } + ], + "md5sum": "5bbfe46b704747413eca01d735d588b4" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.78.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "c0e935a66a0610ca013b1c7a736d5bbb" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.78.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "761161c20c55b875d52b002e19e8636b" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.78.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "62fa83c6cafcccb1793b0139b89d961a" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.78.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a5e61d53d97c030d9e09baa59b50b0e2" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 31105024, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 32016, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16392192, + "byteOffset": 0 + }, + { + "name": "model.layers.78.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16392192 + }, + { + "name": "model.layers.78.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 16408576 + }, + { + "name": "model.layers.78.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31088640 + } + ], + "md5sum": "c144941a588f33931ffe8a3f3d7286fb" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.79.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "2aae978071f087760d5655eb3715a974" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.79.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "4e7bf2314fd100bffed628587eb92d93" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.79.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "4c026073a3df111b69e9092bf0400d14" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.79.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "e725e00c9967526863a23e90e1a79203" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.79.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e4bfab7ec42301c575efc27b0cd92f05" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 131137536, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 32016, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131137536, + "byteOffset": 0 + } + ], + "md5sum": "fdd171685516255626037b5f955f37a1" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 28360704, + "records": [ + { + "name": "model.layers.78.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.79.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.79.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.79.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.79.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 18907136 + }, + { + "name": "model.layers.79.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24150016 + }, + { + "name": "model.norm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28344320 + } + ], + "md5sum": "5326b78c12f758c347a4a710935ef249" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "a26f26704b7e14b5b7cdf415781b762e" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "5f7acf8439ff163331cd7823acc5fdb9" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "bd5b307b2fb1ed5dbd76512994b11996" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "44a9af725c4a4c7f9230ef75f064955c" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 31105024, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 32016, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16392192, + "byteOffset": 0 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16392192 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 16408576 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31088640 + } + ], + "md5sum": "2a0ae3e141417fbe9ab3924dd91f54c5" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "8926697169a1dc5a112ecc63f254501c" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "a2a8d1b00122f18cbbaacca448f61010" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "a47cb94f1ed5cc596e6d9589d8926400" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d2c8971f05cf114031d77ad3b324efb7" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "92daeaf1f82ba867177224da6aa8bbcb" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1ed516fd0da7cebd92f42f4accdc382b" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 29392896, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9453568 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24150016 + } + ], + "md5sum": "41252060d8765d5a88cf238c84743a68" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "ad74d2fdf5979c5d429109ccf3c01ac8" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "a3b3cd5910a3caa94802ecf2da94f062" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "6074f812fc141a46515576e74796a21a" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "96d38bb68953462cade1d6f8fce6664e" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "93641327efd906e29b68ab5c3285e204" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "3c73d8790d79c8de152f506703b2cf05" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "a5d9a667c4988a076db9b2d670d63b0b" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "0d4325789bb9dfd9b4d891ecd9bec159" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 28344320, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 4194304 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 9437184 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 13631488 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 13647872 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28327936 + } + ], + "md5sum": "e54ad30865908a20c389602d9f7aa753" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "20abe3a4ceea2d27390df7de6bdf90d3" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "c0ff36a9e2f8770f2ef14b8fcb691728" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "907bf07463d7978eab7a29b7f992b3b2" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d174febb13dedf9533e69362d0ec6036" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "bfeb80651b5e805595f6a0cbb0922967" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "0d94272c519e8356d9ce19a66ee656f6" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 28360704, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9453568 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24150016 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28344320 + } + ], + "md5sum": "55f7d3798ab13017a58e827901a912ff" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "180a34af53b0eb5de9cb4f2322ab6484" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "4a054e310565452a99b83cf849cf31b7" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "6b027643a6ac69a63a1a64c982377148" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "5be14b7cf4d9d8273593792b29a243e9" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "d383a979abbd98d7e672eafe7ead3db6" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "809fd8f2f0eaea5ffce3f516de2660d3" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "9b6bb5ecdb70d09e4c7f888e12575b89" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "967c6dde91fefce2202d585900e292df" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "5b4517db9424bddf6561b19c691dbb81" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "347882bce52f1c8ab50ecf2668f3ffed" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "cccc2c957329f2505398477fd9e10b2f" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "96623428ba4d91181d0b0d5e15b3e508" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "f7d06eef29db2b97c4fcb0061cf0a3b4" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "5015b9af525063eacf3290766930971c" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ab26a33768b81af0834e34e1aeb06dd1" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "7dfa22adbda402b9e61a19ca23a50d20" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "d778529b0555c333a3b640d9107e382c" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "6ba61e8310d6f83fd5854ddc0cf66180" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "f88a4e343e84756897ad854dc410934b" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6d6b845d4fb408b2d99b9614259dab2d" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "75808593dc2f2a6b8db77bf42e5b60a6" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "fcad5310b2b0b00130d3ae54c0ab8f11" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "34b5d4d5c83e40050c21011d34a0056e" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "0c54234b8af3c3d3d0e16f5284674dec" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "35156044a4d8e67d22dfddd3f41e7d87" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "f73478103da2c3165ca1e8d02b6597f8" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "42a992ff611458772b9a4d1962a17cb0" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "aa3bde5a1d023f566a72ffa84d929e89" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "b1566826a1a6e5d7163291762bf613b9" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "0498dbdd1efc199b08cd650a851d36f0" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "2baa44ea7a81d08ca5ccc62eb4957bd7" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d2b8eebce5eb5a075ac12a2e0174b2ac" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "2fdf73da80abb28c128e30a30977f390" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a4764fbfae6f78813b9e871acb0a89d5" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "50bd9ad2c5dc75a23f2fb4a618d7fe2c" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "43dd3d3b3f2982e045ba2e11157063b7" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "65eb4c2726045de10e8cec7dc0ca100d" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6f3c6526da7766d422d65c11420d44fb" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "3ef788b928e395d956f1b1207f5ea3db" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "ffd1bb65237bb7d6098de65b1f84804f" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "63d48e688896546396f33f67cb61564f" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "4e1bde92b98c48237939ac97b44323b5" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "a1fe47045d7a4870eccf9c95cf19168c" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "e26a436e9345b9043c6ad5222d6a239f" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "0cad8492383b6652280d793afc6c4224" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f45cb700e22e7dee391787baee792a81" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "541673fce87bf5d1818d0503a64c46f6" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "d4fb05956b9f42a350f732c9e8f0524f" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "6ebd2fdaa47abccbc020a988160cbd9e" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ad5eaf36f8974ebd3356869394b733bf" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ed10ac54ff52d52b54895f964ea20cfc" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "690dafcb016930be9dd1d64876f88b5e" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "05e04175eecfad6d1929cb9e59107d6e" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "672db913188bc7defdb038ea5088c26e" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "890a2be8bc8396cb607ef7e728750ca1" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "2458a1d0fc587fe73a4b5498fcc0243f" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "52bc8010ad239cde4cdb4a6b2ef619f7" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "6256ac58947757a95638390c532b9d69" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "9f3d2fe6a8ff40b5fe11e5c0aeeb98e9" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "d5b6425431e23d37a283fea8d4f5195d" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "b07e0b5749813e916afb521e2e60ba54" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ad2e4744161b78e68d67d590cfcb5a52" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "448866359e00d7c074437a1af99f6906" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "0bf8923c3a11e9d6a009f6a00be8cac1" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "43156a3d687a59218aa264012f45c814" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "cc2b63cef95f3f4394e51004a156b292" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "7f64c225d2998d1cbf5bd04814f3fbb0" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "853d10ad7a5a3091091b89ee0747a1c6" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "2526eb5c359781abc4c99fdcdc349abc" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "dc7484ccd36f5ace8906be43b44fb353" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "adbba130b37c389f01bcbfa2dd276550" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "bb9d9ba729a0dd311e65781d879da6af" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 29392896, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 29376512 + } + ], + "md5sum": "944feb32c0f4e874b43dfc69b03de1a8" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "2591f004ff2da35b6024496bc23621df" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "137fca2de5ad2453e06890bc3945451c" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "eaa75985cb6d7342e2ffdc8bc308e65e" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "404ec2cca609b519129b9b815a6c4cb2" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 29409280, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14696448 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 14712832 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 29392896 + } + ], + "md5sum": "3af02f18f6c92f80ec84362f59a096cd" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "329988f702fc750b14dbaef7c3028abe" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "89c3f688747078d7cdcace318a2fa05d" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "2d6376f2807f2405930b0281c501fcef" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "773a36a446ae2b06a01cc508f02fee59" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "75e1a840f8537cf702fe0d3852496882" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b5aebd50decd9366674534944ee99b39" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 29392896, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9453568 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24150016 + } + ], + "md5sum": "186c7ce35d5040de4765f8158fbfb603" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "b30d18c8403dbfb6d103902843b86fb9" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "2e67566e7b7b0c03a53e177c452eba35" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "0baf8422900b165d6bcc3183ac583f84" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "84a080802f9e0f66ba41526a99cc5d14" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "1cce2c8947d81eabdf8225c150c6e8ff" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d235287dc24a54737a1cf39d92c777cd" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "1b202732dc7f0ab6a2fabcf9f6185443" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 32555008, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 4194304 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 9437184 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 13631488 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 13647872 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28327936 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 28344320 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 32538624 + } + ], + "md5sum": "970801edbd679c7e6eca785afbb07583" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "be41a1700f5184c8c4b0bd5ad009f5bc" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "51bdc29932ad6b2db13a0dcca4cb717b" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "7603d68c246070fe743863f9590420e4" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "237d4d0d96aa235a3a9d21bc9c5aa093" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "6c1e564cc9b5e5f9416feaec26cb88a0" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "46ee071126202231ce6a0ae5b96b1788" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "993ae8f87a108fa59bf627be92f7fd64" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b849bc9c15026c23a76769edcaebfb31" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "5e936fda41d18147256c1ae4cdbc0486" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "bc450293a71e6754a0e4c09e4195b4a5" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "cd8e99151608c481cb40bb80467d7f56" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "cc1fb64e9afbe2409a47f11749d801b8" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "6d608b1d0ae12eb0f1b86b4e6d37a6bd" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "8cbaf6fbe538e621a17eeb20d8a63a37" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c8fa904c2fdcf2ec29f44bbf49019171" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "79b32c850e5a3786b4f305ae3a2de47d" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "eba98e6abb7da2e17d3bf9f50aeb2677" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "c7acd4c98368fa9d3f6e9ccc58a5ef3e" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "dc48e8f6aa009de28fcfef0b63de43bd" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "98d77b7571a7ac4db9a33807fcc345e5" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "6ce195cc2d4fc51aa04f01054f8b8554" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "65997109e12f6b42ead259708696a738" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "40d4802ee5654a0ea3bc99ef68d3bd7c" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "4443a0279db3b6075e2bad4e6c17136c" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "b4709c8379997b413973503b66477e43" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "00e987e2be8ba40040c0b60dfd1ecb23" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "ea0e05a7ec8f06873fe121920255e98a" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "d5b6fb46b9c2b0a91f4131aec8b6b7c2" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "c10e52c898ea64dbace84572d4c0c46e" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "7ec173f150fb4937ce82fb6964c77d4b" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "cfc150ac50f9719093ddaf7bc6c8bd82" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "308d93d4469c5041c28d1109dab6bea3" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c7750c625b990e39f6624262743dfebb" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c8d5a919086f3605754303b712b5ea12" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "32868043a9a3aa715df0ddd58a2c5613" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "ba540d76cbf8d967d4666056e7930263" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "f5c120ef7c6c294b58497b4add42fc18" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "fd7368ddbcc6972aba9e6617fa5b73d9" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "de354de692159ee613422c436d9a0e51" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "7a1c1f7c128b7f861f30178099cd3547" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "4627766b9db0f6f43084d6a0c2ef0128" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "f9ee6161acb4ac0048a16ac65f1dde4a" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "2da1e0370b2ebc7db7aefb7100ae416e" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "7f9b95b02109609e403d0ccfe84a8456" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "240594ef9885a26a93a60292935bdf2c" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "8cec6bad7bd3adb24142ea86720d4d79" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "85dfb0f62f9346342d61c48a83ac767e" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "2ac1f68b4cc25f8dd8b3687bd7f97f52" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "72572f7737412a4b8e27bea6eef373c8" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "9b1ae09eb876093a4ee0ac75a8a49937" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "8d6da5046d189a4457835137a58a3a83" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "95f94c8b2da849a2b6c8c53a250419d9" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "9e62a6d44472a85fe2f093400c41e8b8" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "780512d08b269602cf8d8a05862b5e8c" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "07e54ff91590a4d8f281d34cd0579e24" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b9754843bd2094b55c8989ed205d3679" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.32.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "eab288328ed1220cb5e1c9a3779b32a7" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f6d34c5692f4e987e66527859a5bbee5" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.33.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "1fce3f630b1682a33edec129734fcb06" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "052fa4e82a9f9ee7a9f441a0dd41d630" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.32.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.33.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "c70b067ca4c618dc2ac615faa57b7551" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "7463afa2f667d0ccfe0293d5eacd1e63" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "9d21245a0ef7139c6d4e9c00936a2081" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b1808652a66ed8cc14841c8106971866" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "6a03a1dc9288c05e81dffe6fb53442ce" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "46f2c8e45222ef30b2ed64849632c513" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "9cacf69d696d115b4bb790d7fbf20013" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b8dc8ed4fcdbbd9e3b1d8a3dfdc07876" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.34.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "6a05356c871c6e1fe28ca15ed79db242" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e8682a01b9b261ee0ca456355f264cb0" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "ad608fd8985658e0b88df2ca933a1677" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.34.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "3ce1e4bf256b168476f7529d4558f4b9" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "82eb45fd1eb4fb1595a6d12671a27355" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "360ae2ed45a717f996b8b13053198d24" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.35.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "27116218c37ecc8a3623798d9e83e50c" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f3a89067eb30af1ff69945a35a8fcda5" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.36.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "d30d123c4463dee454199d6f8a140def" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "a016a2cace3bd812e599f6ef053056b8" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 29392896, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.35.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.36.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 29376512 + } + ], + "md5sum": "098ca363499169713457189e39e836fd" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "88410b514c6e34f5f257e4facc066a2c" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "82726ec2ad6d0069f650fbd65bd915be" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "31646c467c586562f4fd361a3b4b907b" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "eaaa7527d09c24eebfe28c6f46d0818a" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 18907136, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 14696448 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + } + ], + "md5sum": "c0d475c6152ef2ce7e9e4221e2a7fe48" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "797d4f5d0384a8fd9f64b6c7194be379" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "a6e3afa2d879bec014cdae21314c938a" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.37.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "b9cef9abf522e3e05eadbd4c98552bfe" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "eea04ebb66fd9957f2fd0c1fd63568a2" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "2cfdba0017bfd9c359ae87f232e5c7e1" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.37.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "3bb2f42192107ff47f84d0d9f2cba5fd" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "b30012ad09734b744e5e637dcac41099" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "a643f6c3c9d508a67af6d8ac99b9401e" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.38.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "8e60a27a1608d8c8e2d5f4da8a0b2978" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "66c0c0ade4d80b1aa443621cd16b77d1" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "bee272dbcb1d204419ff3d290abab779" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.38.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "cc4c24338e8c8fca0005eb77ed2fec72" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "7f43d5ead1ef938cf710fc179eb01dc8" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "4f2e5a1984a19243620caa4b40af77bc" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.39.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "69c867d3b2a65b6a7953d3e5fcb1b755" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "fda7131f38aaec33bbe5ea39a901206f" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "0cf6af2f7ef95c3bcba9d80c80672f71" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.39.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "9a98d448f5f9be4269d7a21019ca6f49" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "a966b1c9d985a45b47346453dc0096e8" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "3bb47d78fc740ce8292a76fbc45499a7" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.40.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "d47dfd79eb6de597f29400eec3c94f50" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "43252ac831f86596347bea9a83a60bb3" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "b17d228b31d3422630977847a0709442" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "2f2b28fdaa219474d91de3470950ca87" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.41.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ed897e5572278e43026e9fb880ae4adc" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6dabd8036c0469476e30404b6c0e0dc8" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.40.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.41.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "73d16fa8bc20ee3d1c9567cf93bcd5ea" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "b3a12a928567dfc064118aeeb4ccbf9d" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "c71e51a1c2e282fae5822fb0d445fe27" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "8f7484cbf883c726a5a6aeed77acda46" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "00e0056781672f894ce2f9f9626c271d" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "5e6934e07840018d5f233849a5877ca1" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.42.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "cd2b49dcd54812463310ec22d0037828" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "2769fc5e606bf1f2ca2784f27b9f8fc7" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "f7eccbe26481488ef34d28ddfae25957" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.42.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "483a58eb312ce1a34f773229de6870d5" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "c8982416a4f790b9641102edb5969d13" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "e88b9985d450e2f613de3324929d789b" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.43.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "2ac193cb329f6535adc616052a91d05a" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "66e86f2a54941e1ac13c8ca732dbc890" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "84fb6d9dd3ca0b3e25ca551b513ed775" + }, + { + "dataPath": "params_shard_259.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "4ad7c593d7e26834414422c8d4db1e7e" + }, + { + "dataPath": "params_shard_260.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.44.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "12e1914fde8afe0a23d97c52acd181ba" + }, + { + "dataPath": "params_shard_261.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "29b1cf17b81167ef5600aa0db3eaae38" + }, + { + "dataPath": "params_shard_262.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.43.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.44.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "68c390291528edf87edeabc173f43e29" + }, + { + "dataPath": "params_shard_263.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "def38307aceae067c74628b16d7c473a" + }, + { + "dataPath": "params_shard_264.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "3d41cfb2ea8be371030fe3686c7af32f" + }, + { + "dataPath": "params_shard_265.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.44.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "857c19a11e5e9b4e945f6dc71b51be62" + }, + { + "dataPath": "params_shard_266.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "ec569c17f4cfd8677826970fbfaf1d94" + }, + { + "dataPath": "params_shard_267.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "14ae528a3049caf46ed3d4e9bfe9557c" + }, + { + "dataPath": "params_shard_268.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.45.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "6495cc2c70a7b3e711d81380927bdd7c" + }, + { + "dataPath": "params_shard_269.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "fc96832f40928993deb398ee9e58a5bf" + }, + { + "dataPath": "params_shard_270.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "40138c757de05b5fc11d357931b983f8" + }, + { + "dataPath": "params_shard_271.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.45.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "f08efad43f5d3d189eff471d50df3705" + }, + { + "dataPath": "params_shard_272.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "c5db4dfc25f7cf743d0253b7940ef9c5" + }, + { + "dataPath": "params_shard_273.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "5919bbca5a489c2a6c5cf4d20622dc03" + }, + { + "dataPath": "params_shard_274.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.46.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "e8a50bdc88892c4c618224657cf18faa" + }, + { + "dataPath": "params_shard_275.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "31f7052a678921f7c50410a3318af6b2" + }, + { + "dataPath": "params_shard_276.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.47.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "be071e620983d4d27d5ade85226e1f47" + }, + { + "dataPath": "params_shard_277.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4112152b21bd37705c84b2bc93456473" + }, + { + "dataPath": "params_shard_278.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.46.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.46.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.47.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "aa753491fce332fea541464f787453ce" + }, + { + "dataPath": "params_shard_279.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "69d484b138e4459306fa9b6f473ba52b" + }, + { + "dataPath": "params_shard_280.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "250dca1b30d44a132660635b567a73ab" + }, + { + "dataPath": "params_shard_281.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "c28561cefaed0a07cda2e17c56083865" + }, + { + "dataPath": "params_shard_282.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "a0b3190d0f4c4fac076215e40538ff95" + }, + { + "dataPath": "params_shard_283.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.47.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "99be305266eeeecaf3f675986ef22988" + }, + { + "dataPath": "params_shard_284.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "846453812ad9cec23ba47b9fa9714d8b" + }, + { + "dataPath": "params_shard_285.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "8ce8d9c3c293953862d184da07186dd0" + }, + { + "dataPath": "params_shard_286.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.48.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "68c862afb5c29066b5d300cd63af33c8" + }, + { + "dataPath": "params_shard_287.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.48.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "7647e8d6079f547fe8ed63b965c5f3c8" + }, + { + "dataPath": "params_shard_288.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "97297f696416d64f44b87be6f107c6ba" + }, + { + "dataPath": "params_shard_289.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.48.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.48.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "f15771bd195fed09f3c11b8d35d87b02" + }, + { + "dataPath": "params_shard_290.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "0acbfee1879dd15479fc2fdaddc7f54c" + }, + { + "dataPath": "params_shard_291.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "adbb64ab657a3bdd8538eb03a9f0b564" + }, + { + "dataPath": "params_shard_292.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.49.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "3f2cadfd8ba9e7877d32dc29755e712b" + }, + { + "dataPath": "params_shard_293.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "23d917d78af2ef2fe1c3cb40d81f1f2d" + }, + { + "dataPath": "params_shard_294.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.50.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "b314909d18b9953207d7dee6ccb6b756" + }, + { + "dataPath": "params_shard_295.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "4c94ccbb2a90b07e5533af4cce3d0cfc" + }, + { + "dataPath": "params_shard_296.bin", + "format": "raw-shard", + "nbytes": 29392896, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.49.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.49.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.50.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 29376512 + } + ], + "md5sum": "bcd7d14027175631394d0b0946a3c2a0" + }, + { + "dataPath": "params_shard_297.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "17aea70f6d3c9576fbda7bb0c983a32b" + }, + { + "dataPath": "params_shard_298.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "5bd74d921c274f04aac746363164ba6a" + }, + { + "dataPath": "params_shard_299.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "39781b64cebac2c314eecaa457761cf5" + }, + { + "dataPath": "params_shard_300.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "8db7a92034af7df689408ef1fa11e9a6" + }, + { + "dataPath": "params_shard_301.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d7e1f7af6aed8a3b5a24fb41deb6378b" + }, + { + "dataPath": "params_shard_302.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "2e7fecdaae4ccd7a6244035f528108ec" + }, + { + "dataPath": "params_shard_303.bin", + "format": "raw-shard", + "nbytes": 29409280, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14696448 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 14712832 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 29392896 + } + ], + "md5sum": "da60da9b67a7f52ddb54a96a143498eb" + }, + { + "dataPath": "params_shard_304.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "03bd7ac3ce0d4175b0353594b03f7904" + }, + { + "dataPath": "params_shard_305.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "13964a1d0d1baf76f5bb5d811707ae8a" + }, + { + "dataPath": "params_shard_306.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "df989adc116a9ac21133519f6020d404" + }, + { + "dataPath": "params_shard_307.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "de7e98007b2ab88ecf565b99cfc7dfc7" + }, + { + "dataPath": "params_shard_308.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "46cce831b08a621b72aaedf7af8f6f96" + }, + { + "dataPath": "params_shard_309.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1f96759a8e52b0b3f8d18531d817e4ee" + }, + { + "dataPath": "params_shard_310.bin", + "format": "raw-shard", + "nbytes": 29392896, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9453568 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24150016 + } + ], + "md5sum": "09dd95c10d3683de76720b352d404ac3" + }, + { + "dataPath": "params_shard_311.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "86c24d3d7bdb52d57a9a8f3fbd97c21b" + }, + { + "dataPath": "params_shard_312.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "1fcec46995f57f82183cf1ea9e3c1956" + }, + { + "dataPath": "params_shard_313.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "aee271ac47439fc48ea43bee52108310" + }, + { + "dataPath": "params_shard_314.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "3111b4cc0252b55fb268047e04cd98e3" + }, + { + "dataPath": "params_shard_315.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ddfeefcd8980374d4a5ceeb5ac100f0e" + }, + { + "dataPath": "params_shard_316.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "b82de9166c206856dcf1680aed5d9460" + }, + { + "dataPath": "params_shard_317.bin", + "format": "raw-shard", + "nbytes": 28360704, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 4194304 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.50.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9453568 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + }, + { + "name": "model.layers.50.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24150016 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28344320 + } + ], + "md5sum": "ab875bf5cd05fc3b058f1149210c5b23" + }, + { + "dataPath": "params_shard_318.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "cdd225c4bae5030317b281993ad8fc63" + }, + { + "dataPath": "params_shard_319.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "a6bcd98329581f4dfeb4f2a9182ddd41" + }, + { + "dataPath": "params_shard_320.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.51.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "186ae19925576f6ad73188186be27adb" + }, + { + "dataPath": "params_shard_321.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "5e6a289abee17d29c54b78883ce144c3" + }, + { + "dataPath": "params_shard_322.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "7253cb3f80e96c3c70045a7be62a88b3" + }, + { + "dataPath": "params_shard_323.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.51.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.51.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "93760b184a182f2ff2f941fc16f239cf" + }, + { + "dataPath": "params_shard_324.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "80e835dda4491b80441f259af3032a4b" + }, + { + "dataPath": "params_shard_325.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d45b1765df9557076bc057038dc47bb9" + }, + { + "dataPath": "params_shard_326.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.52.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "b6c492a09809265326504acaac20ba51" + }, + { + "dataPath": "params_shard_327.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9094388c78b329aa31ab76e3497806b9" + }, + { + "dataPath": "params_shard_328.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "d0e04c319310d03683c57cdf9554f19f" + }, + { + "dataPath": "params_shard_329.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.52.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.52.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "4751f54dc10a7e2b55025b8251488c89" + }, + { + "dataPath": "params_shard_330.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "45451f174a37cce0e8a7b36cf2b2a740" + }, + { + "dataPath": "params_shard_331.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "9c677ea96af31426384ae87b3de2333e" + }, + { + "dataPath": "params_shard_332.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.53.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "2820550142ac796d3ba942760369be06" + }, + { + "dataPath": "params_shard_333.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.53.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1dd3510f15b31adb2e7809ba271af2dc" + }, + { + "dataPath": "params_shard_334.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "f8a1f2579d31ad63dc81885b04a5d25f" + }, + { + "dataPath": "params_shard_335.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.53.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.53.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "e8650f3ac9d1a8ad7b058f08e7552891" + }, + { + "dataPath": "params_shard_336.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "bb2717987cb837a14e186395dbb3ad56" + }, + { + "dataPath": "params_shard_337.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "293b220270a5641189333e84bb895012" + }, + { + "dataPath": "params_shard_338.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.54.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "6186c2e58ce138ee3deec80400641baa" + }, + { + "dataPath": "params_shard_339.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "cd587b6370d6c7e12d1ce672c449c55b" + }, + { + "dataPath": "params_shard_340.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "ea6263c93845f8bfc0664962c801b172" + }, + { + "dataPath": "params_shard_341.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "efacf6e933d4a48d413bec9f4709dc90" + }, + { + "dataPath": "params_shard_342.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.55.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "89d4122d8a37df90909f4b2d50e4c88e" + }, + { + "dataPath": "params_shard_343.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "7f8391fc09046486a1eed4035985c287" + }, + { + "dataPath": "params_shard_344.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.54.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.54.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.55.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "f5c3cd3c83c6a76baf7120db9e2ce360" + }, + { + "dataPath": "params_shard_345.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "804d545b732d8ee453ee9137ef8afe95" + }, + { + "dataPath": "params_shard_346.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "2bb20455bc31d68bb14f15c2c618273f" + }, + { + "dataPath": "params_shard_347.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.55.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "e8e69b854c4825186def999c8fa78c9d" + }, + { + "dataPath": "params_shard_348.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "023599df5ee3596fe31956f4724a8861" + }, + { + "dataPath": "params_shard_349.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "27dab307ee4a623c30ba0d2e27842cc8" + }, + { + "dataPath": "params_shard_350.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.56.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c0a342e1d968edc8a6a4ad896d5b343f" + }, + { + "dataPath": "params_shard_351.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "0f34665b87d80699b43c2c4406da228e" + }, + { + "dataPath": "params_shard_352.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "96553de0659acb52ab4a10f11e0e2b79" + }, + { + "dataPath": "params_shard_353.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.56.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.56.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "94757716b3d0871a29312f5ce49c95b0" + }, + { + "dataPath": "params_shard_354.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "7aff7f462a73893e85ed26575f819fae" + }, + { + "dataPath": "params_shard_355.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "c06fd705e682d7dbfb2eed60bf52a310" + }, + { + "dataPath": "params_shard_356.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.57.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "e3bfabc8cbe7c67951ee8f4416a755f4" + }, + { + "dataPath": "params_shard_357.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b7ad0b415743182e94218a5e884dfe03" + }, + { + "dataPath": "params_shard_358.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "1a57d038ae9aaeaba58de2b05f1105d9" + }, + { + "dataPath": "params_shard_359.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "45c021cc39d651f6a95ac87a2fb57026" + }, + { + "dataPath": "params_shard_360.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.58.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "aa8ab1992db3ccddcb11c3ab8f9c209f" + }, + { + "dataPath": "params_shard_361.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.58.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f164c873f6c5e23e404a2d55f5b29b9f" + }, + { + "dataPath": "params_shard_362.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.57.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.57.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.58.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "a49c86b85e840f7311354b32cf680350" + }, + { + "dataPath": "params_shard_363.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "6160993a91235f161f0cdd8abef82516" + }, + { + "dataPath": "params_shard_364.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "8e9d74c6b5bcd5d944ad12c0fc5cf4fa" + }, + { + "dataPath": "params_shard_365.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.58.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.58.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "0f67ee7f65231135ac832ed68c0a52a2" + }, + { + "dataPath": "params_shard_366.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "17d0e1f28c8b93e23bbb95e84d94ffb8" + }, + { + "dataPath": "params_shard_367.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "231efcaa79552dc2c2e5d4fba8bfd763" + }, + { + "dataPath": "params_shard_368.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.59.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "6cf53269d9f8c261dee1dad91375b8cf" + }, + { + "dataPath": "params_shard_369.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d8287bd240243679ac3d76115ca0c8d6" + }, + { + "dataPath": "params_shard_370.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "0efa7b0d03c612397024b4c9c5939a5e" + }, + { + "dataPath": "params_shard_371.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.59.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.59.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "f8ef42c40745f7e7f5d6677f41ec7baf" + }, + { + "dataPath": "params_shard_372.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "062cb35a9fdb411a4c7fe1d1c8a88622" + }, + { + "dataPath": "params_shard_373.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "2f71df79691445fa11e30ad4b43c52d2" + }, + { + "dataPath": "params_shard_374.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.60.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "3af9ce31cd61a036197d07b51cf58920" + }, + { + "dataPath": "params_shard_375.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "0bbb0e88dd886682686d5d185ff9a57d" + }, + { + "dataPath": "params_shard_376.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.61.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "33f9443555a4b1fb39b4bd05684e2d53" + }, + { + "dataPath": "params_shard_377.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "fb34700a48dcda6a85afbd96e35ce73c" + }, + { + "dataPath": "params_shard_378.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.60.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.60.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.61.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "b2b5a308283d232ea961c06fe9549b25" + }, + { + "dataPath": "params_shard_379.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "e38302280afff12f97f0f1a76ceeebdd" + }, + { + "dataPath": "params_shard_380.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "e41a872754867a0df9609f51ad6809b6" + }, + { + "dataPath": "params_shard_381.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "9e1613eb8150202c89c7b3cc65fb6f6b" + }, + { + "dataPath": "params_shard_382.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "f827c2244d1d69f7e07c13c8e2e9d53c" + }, + { + "dataPath": "params_shard_383.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.61.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "ccc535e2322c159764c4a53aead876a9" + }, + { + "dataPath": "params_shard_384.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "54f802c6b47e113209eb482d6b804617" + }, + { + "dataPath": "params_shard_385.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b049981939d86bd785f99178e3a89b33" + }, + { + "dataPath": "params_shard_386.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.62.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ada72e856fc5c3a58e2d90431b5305c3" + }, + { + "dataPath": "params_shard_387.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "dd2a9549384bfd9283f3707ed3096730" + }, + { + "dataPath": "params_shard_388.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "3a9b7d1b46bd77c0162879f5da176edc" + }, + { + "dataPath": "params_shard_389.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.62.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.62.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "e5938cfde3c97f38a14c75a1ea48fcee" + }, + { + "dataPath": "params_shard_390.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "6131b1959c53296b2c7c364a448c3ac4" + }, + { + "dataPath": "params_shard_391.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "e1dccb6e2088dca5f507793461c7bec9" + }, + { + "dataPath": "params_shard_392.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.63.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c8746b91bd8c71bfdace54b512bc85db" + }, + { + "dataPath": "params_shard_393.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.63.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "2c4e99d3b45c47ce6940cdd69f525d15" + }, + { + "dataPath": "params_shard_394.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.64.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "40efe1214159b002b96271901a752e87" + }, + { + "dataPath": "params_shard_395.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.64.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "2624caaf1d08d7e49679fd50e0fd3fd6" + }, + { + "dataPath": "params_shard_396.bin", + "format": "raw-shard", + "nbytes": 29392896, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.63.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.63.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.64.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + }, + { + "name": "model.layers.64.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 29376512 + } + ], + "md5sum": "c26616d487f24ba68e7690ab671c6431" + }, + { + "dataPath": "params_shard_397.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.64.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "dbaa6b4c85439f36c4bd1bc48efa8238" + }, + { + "dataPath": "params_shard_398.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.64.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "f0ceed3c5d246995289a03678c9be829" + }, + { + "dataPath": "params_shard_399.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.64.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b7e760b3b440fc5580bc6e371fe29b35" + }, + { + "dataPath": "params_shard_400.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.65.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "6f4228462b7f363fbbe186d57406fd97" + }, + { + "dataPath": "params_shard_401.bin", + "format": "raw-shard", + "nbytes": 18907136, + "records": [ + { + "name": "model.layers.64.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.64.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.64.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 14696448 + }, + { + "name": "model.layers.65.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + } + ], + "md5sum": "f8933c1f3a141ab5d7577db80f5fe78f" + }, + { + "dataPath": "params_shard_402.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.65.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "56d5fd3b04967bc0f84b681fd7273dca" + }, + { + "dataPath": "params_shard_403.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.65.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "f2610e8d9a525c35a8dcc9d362acfd47" + }, + { + "dataPath": "params_shard_404.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.65.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "120af2b0774c0cea4831bb18d05b31eb" + }, + { + "dataPath": "params_shard_405.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.65.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "8cd3bb576c7d83439865856024ad70ec" + }, + { + "dataPath": "params_shard_406.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.66.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "b812c2281b98ac6a0655810dbd81d71a" + }, + { + "dataPath": "params_shard_407.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.65.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.65.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.65.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.65.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.66.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "587161d4deda41429165d85a1c8c3db8" + }, + { + "dataPath": "params_shard_408.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.66.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "6ee7b2ccb5b8c85bc2580398f9696521" + }, + { + "dataPath": "params_shard_409.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.66.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "7258972f4b0d1c5f5679147d89902a11" + }, + { + "dataPath": "params_shard_410.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.66.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "1ad6a494ff417ced6c1e2d1b4776b295" + }, + { + "dataPath": "params_shard_411.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.66.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "270010359e6cea91eb399fde5e2608d0" + }, + { + "dataPath": "params_shard_412.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.67.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "2d8b40f553568fdcfef189bdc4d847a3" + }, + { + "dataPath": "params_shard_413.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.66.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.66.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.66.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.66.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.67.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "0e8f26447f464753828b499bd39dc34c" + }, + { + "dataPath": "params_shard_414.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.67.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "fdd228a4c516ae9e82d2f765fbb20eee" + }, + { + "dataPath": "params_shard_415.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.67.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "fdbccf6b06ee5262040b50e01c6e326d" + }, + { + "dataPath": "params_shard_416.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.67.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c2c3f7bd8d23c61a5cf60b06068f67ab" + }, + { + "dataPath": "params_shard_417.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.67.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c090b0115b323966ef3f438fe1e20258" + }, + { + "dataPath": "params_shard_418.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.68.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "3ed1c7e431fba29f5b11d554117f1556" + }, + { + "dataPath": "params_shard_419.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.67.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.67.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.67.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.67.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.68.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "d4b976b5ab29047cf4286a4958f83395" + }, + { + "dataPath": "params_shard_420.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.68.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "a313ca3bc6b1a03502adb9fb432edd4b" + }, + { + "dataPath": "params_shard_421.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.68.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "df615fc9e5c082d41c433791dce51a2b" + }, + { + "dataPath": "params_shard_422.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.68.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "2434df468f805461324ade065ec198f0" + }, + { + "dataPath": "params_shard_423.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.68.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1fb52f1a3dc03ca27f34033f772691d7" + }, + { + "dataPath": "params_shard_424.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.69.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "478abe3e940fc1052746e6112c613526" + }, + { + "dataPath": "params_shard_425.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.69.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "0242a6e30f4ad9556a7768a6dbc8bb2a" + }, + { + "dataPath": "params_shard_426.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.69.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "9febd550e145a373ef71d4a2bbd46574" + }, + { + "dataPath": "params_shard_427.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.69.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3a996cd781e0c7786854b408b9e6bf4c" + }, + { + "dataPath": "params_shard_428.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.68.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.68.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.68.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.68.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.69.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "b67b4faa43f07382f5c136599fae5ec6" + }, + { + "dataPath": "params_shard_429.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.69.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "d81dca4be6171e1f42c3c4ac5812d251" + }, + { + "dataPath": "params_shard_430.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.70.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "da80a55674f43e1221f543ec60652915" + }, + { + "dataPath": "params_shard_431.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.69.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.69.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.69.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.69.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.70.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "bfc73ec95d87a56bfe97d5849c9526f3" + }, + { + "dataPath": "params_shard_432.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.70.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "4ca517e9684358c0c8e524a718865fd5" + }, + { + "dataPath": "params_shard_433.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.70.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "23e55dd9fac1646ff00f1b1c6317e9e4" + }, + { + "dataPath": "params_shard_434.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.70.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "2fd5371c31c030ce0bb20ec95f24a2ad" + }, + { + "dataPath": "params_shard_435.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.70.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "fc6b2d47edc9459d79deb041f9ce4b57" + }, + { + "dataPath": "params_shard_436.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.71.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "864be1d369de6aa33a67409be60197f0" + }, + { + "dataPath": "params_shard_437.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.70.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.70.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.70.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.70.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.71.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "47b1b7b6b6c3655384e612e43599ec21" + }, + { + "dataPath": "params_shard_438.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.71.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "0bc5076f90a192e2a107f66a7db65c39" + }, + { + "dataPath": "params_shard_439.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.71.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "28c30dc9cc083fc06343c993a8c8e1a5" + }, + { + "dataPath": "params_shard_440.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.71.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "7b28f4bcc59133ed7c39a190bbba7c16" + }, + { + "dataPath": "params_shard_441.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.71.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1cc78ee5f76afb2e636cc558a0486038" + }, + { + "dataPath": "params_shard_442.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.72.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "1a4df4fba9b01bc6cf48506aec022302" + }, + { + "dataPath": "params_shard_443.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.72.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "cbb01083d97ed8bbb8dfa7d75f416ede" + }, + { + "dataPath": "params_shard_444.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.72.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "d2180f47fbb2fc0189800ab2c82b9bbf" + }, + { + "dataPath": "params_shard_445.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.72.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1dc60f39564e441dd355f1c29b35b23d" + }, + { + "dataPath": "params_shard_446.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.71.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.71.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.71.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.71.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.72.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "8b779bb0afb96aec285576a70a4f3548" + }, + { + "dataPath": "params_shard_447.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.72.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "605311160bed62ccdb63b2384be9d73d" + }, + { + "dataPath": "params_shard_448.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.73.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "e8347d7b76e28ccfa6ca4b78e44bbb00" + }, + { + "dataPath": "params_shard_449.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.72.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.72.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.72.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.72.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.73.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "3fb6212f8e398c7541795c0b274b097c" + }, + { + "dataPath": "params_shard_450.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.73.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "80ed07c4e95e5353027a8ec2c80cfc1f" + }, + { + "dataPath": "params_shard_451.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.73.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "42dd1e38c76b20bc15efaa9590001196" + }, + { + "dataPath": "params_shard_452.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.73.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "b4878aca263afe6ea1bd66f82e472006" + }, + { + "dataPath": "params_shard_453.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.73.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f6d01db0cdf5b3348be450b399885a19" + }, + { + "dataPath": "params_shard_454.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.74.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "11ab935a4aecb4fb2341746ec187aaa5" + }, + { + "dataPath": "params_shard_455.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.73.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.73.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.73.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.73.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.74.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "89dd6fd18e1224b3efcb51d87dc31218" + }, + { + "dataPath": "params_shard_456.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.74.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "06c8673b3c1cac89583cfe3f5e257da2" + }, + { + "dataPath": "params_shard_457.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.74.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "a65063e88c5578c62e3298d868e08b25" + }, + { + "dataPath": "params_shard_458.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.74.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "d32b00664e84f665b4e97c2184b8714f" + }, + { + "dataPath": "params_shard_459.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.74.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "0bd95b60297a49e48e4520aec5e14b05" + }, + { + "dataPath": "params_shard_460.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.75.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "766a09f77204bf14116b8777a7ff692c" + }, + { + "dataPath": "params_shard_461.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.75.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b652ba7ba4b4a62af2fc0e7934d02ad1" + }, + { + "dataPath": "params_shard_462.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.74.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.74.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.74.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.74.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.75.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "ed92f9fa23ff71a28fc5f20e7514aacd" + }, + { + "dataPath": "params_shard_463.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.75.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "9a4ea3b8ca43147132b5316fbfac26f8" + }, + { + "dataPath": "params_shard_464.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.75.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "8e7b8fd524e35b9be157a2de32b6e068" + }, + { + "dataPath": "params_shard_465.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.75.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "5d9fd24b73ab949960735afb46532f11" + }, + { + "dataPath": "params_shard_466.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.76.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "feedebf9e1cf473c98a2d167eceb7755" + }, + { + "dataPath": "params_shard_467.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.75.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.75.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.75.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.75.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.76.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "7023b6742793f33f8c7ba62a22432df8" + }, + { + "dataPath": "params_shard_468.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.76.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "2bf93919b5b4903c010ae936851b1363" + }, + { + "dataPath": "params_shard_469.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.76.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "3a2ff8c5056f78d5567f5a6bc5f6d45c" + }, + { + "dataPath": "params_shard_470.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.76.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "efdbac6f49ad8103f6a5b75f9141d7c8" + }, + { + "dataPath": "params_shard_471.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.76.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "41393df80c3fb4a35e960b9bb8e59191" + }, + { + "dataPath": "params_shard_472.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.77.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "d23172189fec07268f218574466d7879" + }, + { + "dataPath": "params_shard_473.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.76.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.76.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.76.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.76.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.77.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "182655ba935ab610b9dff88365088682" + }, + { + "dataPath": "params_shard_474.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.77.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "a2aba98f68d19aba66ce5a2ee4ebc3f3" + }, + { + "dataPath": "params_shard_475.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.77.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "59a1000deeaf65db678bfcc18938ad83" + }, + { + "dataPath": "params_shard_476.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.77.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "1b6e693b44ce411e079331d5b3b9a83f" + }, + { + "dataPath": "params_shard_477.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.77.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "aa677ce2d68b526b905a6cc78fd13024" + }, + { + "dataPath": "params_shard_478.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.78.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "868c33911c6b2eee163af5b0d428d6c0" + }, + { + "dataPath": "params_shard_479.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.77.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.77.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.77.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.77.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.78.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "7ffa624750ede39dd878c9ff6d735a3f" + } + ] +} \ No newline at end of file