diff --git "a/ndarray-cache-b16.json" "b/ndarray-cache-b16.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache-b16.json" @@ -0,0 +1,12543 @@ +{ + "metadata": { + "ParamSize": 805, + "ParamBytes": 43115020288.0, + "BitsPerParam": 4.836979883651508 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 131137536, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 32016, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131137536, + "byteOffset": 0 + } + ], + "md5sum": "92c54271d604513980fbba18a5d415e4" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.78.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "8759e093f86e3d4f800da1347b8c28d0" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.78.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "754d412a7cf8322470fa9a824bb03582" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.78.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "aa79f69aa8bafdba5e3a25088af5e90e" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.78.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "48ddc18c1645a6db6c7f199c29340bb0" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 31105024, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 32016, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16392192, + "byteOffset": 0 + }, + { + "name": "model.layers.78.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 16392192 + }, + { + "name": "model.layers.78.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 16408576 + }, + { + "name": "model.layers.78.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 31088640 + } + ], + "md5sum": "5b606c8a7605532ad84ac202b030d61a" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.79.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "77483f33536c7facb20fee780d9c9424" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.79.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "a79919276b74aaf126a7a660768391a0" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.79.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "81b0ab8188abd912fc55925301332fbd" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.79.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "2aba4501c4cfdc48b5c8706c2510eaec" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.79.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4cd92c2d8b86fcc716a6148d09c893a1" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 131137536, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 32016, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131137536, + "byteOffset": 0 + } + ], + "md5sum": "2a3177679984267d9d9733ac25293707" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 28360704, + "records": [ + { + "name": "model.layers.78.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.79.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.79.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.79.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.79.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 18907136 + }, + { + "name": "model.layers.79.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 24150016 + }, + { + "name": "model.norm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 28344320 + } + ], + "md5sum": "416b688e5dcecf8d79284ee88e00449e" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "aeef5d23d0d550b67007ad3dd0d825ff" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "d564d3c19d4aeb8da0d0441ad04fd798" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "c34427fd24feb272e3218c72848fce10" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "0948a17e17b0a7ed52ed168d7a5e35bc" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 31105024, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 32016, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16392192, + "byteOffset": 0 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 16392192 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 16408576 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 31088640 + } + ], + "md5sum": "01caa7d40b3909ff12f6c0446e9e7af3" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4e59990dac77a9c24493b2b07bde9a71" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "5517ff4f3fdef91dbddc9be9e422b552" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "62ef18450ec41313d539cb90a914e85f" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "227e4f43ffbea597bddad08ac874c196" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "086de728ed491dde4fde2ad66470d031" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "5a57a91151afcb8d3e348bd0aace6dea" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 29392896, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 9453568 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 24150016 + } + ], + "md5sum": "3bd9cd137bd3020b17434b9fbfbd8280" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "dda999c0b1178b124baaac852f75ddb3" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "bb41b8dc6341070b07f212b3fbb1d885" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "5bcd24adfcc1e901e7acf23ea38735ab" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "68e30c5bf1231e1aff9d68b939e63db0" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "38545d8809d301af4e24511da1da8c79" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "b4b7432a109c3a8ff4e7b82fc84f9e67" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d6fe87f468d47dff6c8ae376fc717199" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "f5bd92b5ae97345867f9ebc5380530d1" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 28344320, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 4194304 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 9437184 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 13631488 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 13647872 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 28327936 + } + ], + "md5sum": "ea7d370e3e5aad232b9613e7a745e534" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "310241a5fbc5fd5483103a20d85c4400" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "fbf24ce23d2a9eaf6932b090a7147859" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "bad347da4768ab1e341660f959e52d36" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "bddd9d97d503477f5807f02c303f703b" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3e392dca52595126be3d276b5359344b" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "0c5203186e09dffd58d21eac2d2ee511" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 28360704, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 9453568 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 24150016 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 28344320 + } + ], + "md5sum": "a5bdf6df35df8226effcab474785b6a3" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "70dbe41f902c01e9fa1ef381701ce0f7" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "178de84061e791c22153caf00b21b59a" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "45107c1000bf3f247511e08dcd7ceba1" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "869d24353a33dd95ddb1bb3c786cb13d" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "8ab69d7867cb27b27f030ca44fe6abc6" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "1d26c5299bb1fd3cb1118a46e92cfa95" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "bdf64bb31f06b6004c56c93294e633bd" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "10ee09359e22516fa43197cbf0c4fbfe" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "e6646cc63f0d02fa34804509bb55b30a" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d998e3a6256ddf08d82396217d791cb8" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "7c7d18dbefadab8cd1a62dfacd02916b" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "39732abc2cfa285d6a060253278a5930" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "77a90a5b0d42bfd988ae79482ae0bc23" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "8f58729e86c0ceb7d13ae51e5ef4a906" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "e75885239c580f64e81df5a24eaa91c5" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9be378a39ba47987a82ddd32199020fc" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "85a91ad7384b8c18dea8fe029fc7f12d" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d3d363f16cc992a860fc1e8244a316f8" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "161df1547ae012e296f6cda51df9913e" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "72404c2df25107d7232cdb013cb9737d" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "2b56aca2045bb0d69052a51fd1cf54c7" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "280575da8899e1d96bad8d46b980c36f" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "a9e86890565a20b2bd4113e62fbca167" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "50156e4e7ae30a2d484e1dfa061f2f16" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "b71f4e97e8b041ac76c6b581d834d3d0" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "01cdf7a5d4876f418fec757368e618b4" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "5c973eb232a9940d531e8629f7ca82fb" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "58a87e74c9fc449418fe9fe7cc2fa73c" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "32f7ce5c8b204b5747f56708f0f65a97" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "e37742c44ce9f2b0f446a7fde2e1076e" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "4bdd07a6160a9eb91c77471842fb8cd0" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "daffabee68487ed23ae579fddfce4f5c" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "f845a52039e3f91d25dd7c64a3dde7ef" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "85f2c5165c9b6278ff6e008eae52a389" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "334714bc455fd27a4966c0647a8058cc" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ccb354a3d4237e55c309e174da2e7ac9" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "d34227f12560cc1808fb97520e2ff2cc" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "301da518462509443087055f3cc94a1e" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "817e4dc3fccbf7e1ea4b69bd03fd0cd4" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "580d795d4715d93e40cc688b4e57366e" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "4bbc1286e1b506f05470abcfa5f07672" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "5d5b94b328047caeaaf63ad2c154667f" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "3ee0dfc2ba955b3a290e334e90cabab1" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "10eec93c1daa301ac25e9f9c46008e89" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "59950380146f3867b220c4201afbf993" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "123f1efbf936450a4f7019e8280aa146" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "bccaaf2c91623207949379460ba9ccda" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "8754d15db9531162feedbb06624f3a37" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "a86a8e55bc14dce4f04839dbf7b2a35c" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "1720f71d46276e8e8020bde725fef41c" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "47ee2bf1cef17642a4adc0efa6f5bec2" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "22ce9d8a58d7682ec1123c09aa5bb9e6" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "a48f6d2325b05090f86db9f16db093c6" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a40795fad4f10de0570bffbece878fd0" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "ae027598142bf5efac89e68f6a9a0975" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "ecedaa61c3c520235fdad1e7802455a4" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "9404152164f43b5e344a401f69a25d92" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "a46cce931e51acd59bf8785ccd176ebe" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "202d65704e0bff3e392aa5abb1767bf2" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "bb0b8796c31dc76fe185e902f6c709c1" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "a4c565f4a55534df8016859862d2dcdb" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "bcb9dfcf4bd69cd26a73d79e3bf6efba" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "8b6fa204a26b6f21ec213d27b7d7e894" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "625ff1a5312569697a038338c2f09f56" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "3fba4ef90189665fdbeb882c575eb823" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "4dad54fff1f1856e05bfcf7835e15cc7" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "6b735a23364b8b5c8d0a6cf1b30d983c" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "168a3a4b7ce31bc687a9abf3e8526379" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "df0d4adb34be6fdc3ea9d07002e78aed" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "216871a0a43337d0f29d9f69324ab375" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "4678e933c4ad96d9b32bd56019acf277" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "4e69ff7dfdd1e5c8f97cf18aa59f4ddc" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 29392896, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 24133632 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 29376512 + } + ], + "md5sum": "05c85139f7bec6570b8d4cc03dab3e5f" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "0924eea52ee15a947b8c7a31f0515d52" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "6576b08b73b0f34e7c0157471ea849f4" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "f47b71d0a6547467ea6ac09fcfd9e85a" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "86caa14eef6065444721f83c13821ded" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 29409280, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14696448 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 14712832 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 29392896 + } + ], + "md5sum": "f0bb59bed185b1478ffc23bf75c47a7e" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "77e611b137554bd2da16bef16b5df97b" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "379d8c0c960ad95b868f0249fe2c5e14" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "f738c37c0526f7a1a23717e7bcecb1a4" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "7a163ec520272cf0639d5412317fec01" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "8727220d42a8b80746845fbadbf62c64" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3e2e2f4ef0013a7e3a541a8d57fe76d8" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 29392896, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 9453568 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 24150016 + } + ], + "md5sum": "480c50654aa6a94a49b0c994ccf94c08" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "de8dabfde00d2818f388b395230eb274" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ecee0db55a3ed4819a75b3bdcf2ba58e" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "9b558b56f836c7d947eb80c8f04db1a1" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "203a07785f2915cd92c02c2fa9dba534" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "9e6b89938a04c3239c6c25daa8f4438d" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "fb585b1355a8f3be319de349414102ea" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "2831d2fd90c22797a609a786acd320c2" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 32555008, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 4194304 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 9437184 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 13631488 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 13647872 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 28327936 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 28344320 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 32538624 + } + ], + "md5sum": "e470b9f3a5afc6030cfbda46ec15b302" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "8463c95a651de034144ef896b795103d" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "4031f7667616860f69cfc302f5c8c523" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c9cce8fb3291deb6f549f93efc56ec03" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "373f7405040007591a531f7a04ac2e40" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "a15bd572cd8000183d346259f2bc4618" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "c46fad2a175586db5fa28a43683610d1" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "3ca800b31b31aaa582b6de149f91ee00" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "7abe61b46e948414a513e86e14ec7084" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "3689bdce5eea611f20782290dcb3c9b2" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "fafc66ab56af21ff6cc4fb5a08cc79fb" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "58d3ddf3a7682e73cb155fb9ef3b883a" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "2eb5745013f0a6024920edc590cfa2b7" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "7b6eb53155535ec5fb1ed96e2265233d" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "e2e861e055a6a1506457b59984863e6a" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "7be04d055469f487543e2469e1c602af" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a12041e23f4704f85ea857f8b0c54b83" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "bf0984b60802016947e752f08a634b9d" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "a64f0d2ba4702eede488dae652416f02" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "d09fe929e57e17ad4ed41e493fb283ab" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "2b17e3944e9d8b535f928e98a81bb12c" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "9a891b28741e3101eead68e9e0bbe0d4" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3ddc092f0761c25b07a001ab195fc3c5" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "fe795c55a93d5850528e88ef87126287" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "fb12557be1f90f9457c9e95a60e47391" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "0f0b2c17935ab82dc03fcab7692f69a7" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d45991e0d4f097420871fc13e0b6b836" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "346b9a649a6957399ddc8bf5a5ec47c7" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "114abe768fbc2c7f4399777ba33d0c33" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "544fcd682ea8b027fa3f983daf62191a" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "c2f496995844364596f2fdd4db10ddda" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "8fa58907b7bfafb8510ef5222f21a7a8" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "9011af7f2e67a86db1e34e5fa955944e" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ef1e945c0e3321d0a2a93521640f3fc9" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6ac174a5d0ebb4d3264ccb3e12c81864" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "438eb42ba46f2efd8477e26bfb7f06e3" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "12321247de24a8c249c21a1696210e54" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "7915826ed941c781edd26898a88357fd" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "497c328f71138cce8ba485afe292b589" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "d1a565e423d844af704e0db1c4195575" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9834132eeffed7930ccbaf8789f51014" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "baf109929f47a31e0379997fbe52ca99" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b91b1383211dd0b1026fd9e4b86e7aa0" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "3563b044b75f377aff30b09bec9ba9d2" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b870ef4d175d2c36810dcbfeb4f5c5f9" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "0d5a27955122e3c5965659a45ae83d0e" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "2d3804b92dd538c42033dbf5a036c3ee" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "41e578a45867265ddb53902ad24cfeed" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "96fcf5d46a60e5f1a2c5a33543872ed9" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "270aad9a32935b467ecca866b6b064c0" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b4b3b5a02f9fd15cd75c02e6675161da" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "6a73ccee7dafc019897dee259903c8b3" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "0872b5e3500d5648a6f17f05db973a64" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "6a8491127a616b7647ec03df01784976" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "2b1919d9e0e199fd81eb3c1977a86752" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "53fd2ba90f20a58a55c5d6cd1b7ab777" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "bec6a7303d7c300a8baf8e3a73247dfc" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.32.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "0c76db2a6ed990a25d8501ffee516dc0" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "8a53d71d130d23eae6961c98d12b65ca" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.33.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "3cc042389a1f616c2515956cdea5c413" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "95e4a35039864dd89e3a55539a114ef2" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.32.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.33.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "4dfdba02ea9ef9602daf7dcc95f898ed" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "90aeee2da8a13a7c44ec8bbd957ce0d5" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "eeb094f3ab48479b1beffbe1b802bb40" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "64e99cfff16e8d42da999abaf66ea58e" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "5fbb7bb67ec73e6e69990e2708f29775" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "bf613fc9488dbf7c61d6a4718ded18cc" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "dfd49c53d832a025f7d095d219d98e3f" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "2750a7e76386f7e28dfc694ed33cda50" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.34.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c199f97960521aac60532408c84de7ac" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "127a8203a670d4dd7a352ae4fdcc8a72" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "c6ea19f464899a430f749c6a2d948ec9" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.34.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "28f1c2bf8e384bebc12c0a475fec74f0" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "164c6f700be14830fae83274c161f5fc" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "c93b021b25c6afe1ff0277a278443f52" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.35.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "2c107086ad9d3a4d3a43698778de154c" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "bd94261d7a9f2935c4b065416d93a5dd" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.36.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "4383b542e0f1195b22db88d820cd7802" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "dbff62161c311162f135d12a411fbfef" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 29392896, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.35.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.36.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 24133632 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 29376512 + } + ], + "md5sum": "e99f507569efa3675e91c40a4917df5b" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "33238786ac25ab32257032f4496e9f56" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b3d4d4d09fa1fafbd3c7844f532d6813" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4be9133a9aa6e77fb96ece9475a67ce5" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "99a06b1a7eb7b6bd979b326ee0c43550" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 18907136, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 14696448 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18890752 + } + ], + "md5sum": "df721507111fa35b2c95809ac74fde6b" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "1bb18c6b819e56e038ffc82e9933dd8e" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "f4ecb06e0750bd3df793a747a0ca1777" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.37.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "534f3b7b43932dc9973c2c3fd9d336da" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "cb83798ea0fb24a355b106dd941b2e34" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "5d954415b5724c648b5780f0d56e784f" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.37.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "d232ce2e29173752d13985b495bb02ac" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "123bbee1f94d6d49e919a884078c31de" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "57d7aee64dc0872a716f0431fbb3949e" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.38.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "d2c5f0fd67f17cb3ac276b51789ebcca" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3a4794aedc2c02b5d755841bf3504a6b" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "fc82891bd4ecd088bb1c883807808600" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.38.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "fa7763c4c0ef76d36b19194f2841e70b" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "4bde6d5f8e60741a9ca7447bbe66a265" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "5b20f94c8f2e430081f04fa3438794e2" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.39.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "1cc5ba24dceb4c357f041b92f4fa1496" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6431e947c7c2f24e60ceb2192eeeeb47" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "31b0e33e42386b3c1fe54b8650e6ba81" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.39.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "0ba5c846a25b7eff8a619aa9d00d0db8" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "af02fc8d5131f9fff7509c713051210f" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "fc189f16ccdc874448250fa61870eda7" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.40.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "b27c4f8424186c02cdd6a25b753724ef" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3287c26238361d8cbd602ef294d48331" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "8a3e2ee6da3368993e4e1dc0c4ae2d05" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "5d39b5b363c31e039eafb18cb53c414f" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.41.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "6a2e98ba37772feaa3db5e1c27628fab" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "05e47b0746e404415468a047736a3cba" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.40.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.41.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "06b48320f1d34204b71cea8602278188" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "8fadf6b6b95dfa8f8ac1eb40ecffb29b" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "2015b188722ab0a46cba74c89006af1a" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "8139fc644c52027f8b3f756dd6ccefca" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "c1477227b5e8a5e9838b6270cac37237" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "bdbc4e165bacb3254141644bad524e37" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.42.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "4a89910bb87861712334fc8f0f0c5bae" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "2d6fa400c0342b1aa3519f11ebcfc0d4" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "e7864291178b69276d2298f1d94a05f6" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.42.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "50dd504c0e45460d290546da0b299951" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "54b94c8baaec40a046f851e8724741d0" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "9948c6f66c0db6dd38640aabd679266f" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.43.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "7398c29ee8236a5abd0e61b826b9c74c" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "2d1f27b669652459738888d7ecb83583" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "94ee5c0c31400ba9cebd0895026c83d5" + }, + { + "dataPath": "params_shard_259.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "de56880273e68404c22ec0d01a08bbcb" + }, + { + "dataPath": "params_shard_260.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.44.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "2769b08e8f028c6ecb5adae5e7cd9fca" + }, + { + "dataPath": "params_shard_261.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "eb6382da706999d4a569a21ec6150235" + }, + { + "dataPath": "params_shard_262.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.43.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.44.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "626ab1dcc20d843ed612734bf10f932d" + }, + { + "dataPath": "params_shard_263.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "1791e443c068711e099c7a8d0a7d8737" + }, + { + "dataPath": "params_shard_264.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "c5c20fc5a49c2aaa5d168a3925433f47" + }, + { + "dataPath": "params_shard_265.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.44.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "44641dfe98b03f9db3ba721f37bd6e10" + }, + { + "dataPath": "params_shard_266.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "8e84217bb0cd07039e0b6f2930be26fb" + }, + { + "dataPath": "params_shard_267.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "0bdb34ad585ea4de479725cd841b27c0" + }, + { + "dataPath": "params_shard_268.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.45.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "980b24d7b5994b09b9bb2cf086068fd1" + }, + { + "dataPath": "params_shard_269.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "0a0cd6487fca619d00d3533c0eea3d3d" + }, + { + "dataPath": "params_shard_270.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "db0ea8524570520c779fb18787b2df36" + }, + { + "dataPath": "params_shard_271.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.45.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "858deb1116c96ffe7eea48a64955b487" + }, + { + "dataPath": "params_shard_272.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "f9a35f307aaa71a89259e75c44b16bca" + }, + { + "dataPath": "params_shard_273.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d3c7cc9659ae453386742bb2879865ff" + }, + { + "dataPath": "params_shard_274.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.46.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "cd637a530c98ee57f45b8d13473280d1" + }, + { + "dataPath": "params_shard_275.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "39acd4c507e0f49410955d9eb2776e68" + }, + { + "dataPath": "params_shard_276.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.47.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c80b23b45864423383bc9231a46be25b" + }, + { + "dataPath": "params_shard_277.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a59a34d3c1d7ea5eb80d7659b73297c5" + }, + { + "dataPath": "params_shard_278.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.46.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.46.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.47.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "c1f9010762b78ac5875790454426f418" + }, + { + "dataPath": "params_shard_279.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "2db71ea7bdba35e7b4d71ee0ebdf0f84" + }, + { + "dataPath": "params_shard_280.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "bcd3e6b1ba84f2afb4b05b795cc53c3d" + }, + { + "dataPath": "params_shard_281.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "a44aa82164bf8bba723b9bf671ca5152" + }, + { + "dataPath": "params_shard_282.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "134090034c3ab1778c0cdb6ba16ac3a7" + }, + { + "dataPath": "params_shard_283.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.47.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "43b420b6df6b03ba38722cddf58bae67" + }, + { + "dataPath": "params_shard_284.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "aa4eaccd63b9c36b9a0669720f50bbb2" + }, + { + "dataPath": "params_shard_285.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "943ebfed1d384e8faac251391ba5fa53" + }, + { + "dataPath": "params_shard_286.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.48.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "5da720dfa7316170c220289dc8986f68" + }, + { + "dataPath": "params_shard_287.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.48.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ccca57402f38295106801d9da38080e5" + }, + { + "dataPath": "params_shard_288.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "6e051e878f60bfa25b64f7e7806508f2" + }, + { + "dataPath": "params_shard_289.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.48.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.48.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "b44f5afd9d9869b73ef478bd49785e9c" + }, + { + "dataPath": "params_shard_290.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "d5a00fa31743e9453516e99d809ccc12" + }, + { + "dataPath": "params_shard_291.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "5463cad40deb1a3bc4272c068cae8a63" + }, + { + "dataPath": "params_shard_292.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.49.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "6e8ad405b142a07810df0e5afd49ec38" + }, + { + "dataPath": "params_shard_293.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "bc4b1925b010bfd398ae6631de60b3b1" + }, + { + "dataPath": "params_shard_294.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.50.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c9eebac88c5741d769d7aaf143d93bdb" + }, + { + "dataPath": "params_shard_295.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "ff9bad4c0879c31ae23b771aa462f09e" + }, + { + "dataPath": "params_shard_296.bin", + "format": "raw-shard", + "nbytes": 29392896, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.49.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.49.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.50.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 24133632 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 29376512 + } + ], + "md5sum": "b2e9a5e00f2f04749b19a40866928e9b" + }, + { + "dataPath": "params_shard_297.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "384a942fa510fa87eb46cb2c1a5a9feb" + }, + { + "dataPath": "params_shard_298.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "42e49e3c6c2d708796d7e93351410a4f" + }, + { + "dataPath": "params_shard_299.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "a258ac46648ac4a8a2856091068e870b" + }, + { + "dataPath": "params_shard_300.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "7c5f337d3f652efb37ec2625738fa860" + }, + { + "dataPath": "params_shard_301.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "a843a65c49480de3e785d977f53ab3dd" + }, + { + "dataPath": "params_shard_302.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "3773fdfff0dd65c637f42ccb44845213" + }, + { + "dataPath": "params_shard_303.bin", + "format": "raw-shard", + "nbytes": 29409280, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14696448 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 14712832 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 29392896 + } + ], + "md5sum": "c464fba25c11e010699d932e74d8120e" + }, + { + "dataPath": "params_shard_304.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "5dbf50f307310e4c15fdea8ca497a1f5" + }, + { + "dataPath": "params_shard_305.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "8e0ae28fe2c257aa411aa5323b6166d4" + }, + { + "dataPath": "params_shard_306.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "3e00aad770fe5324601524ddaca641cf" + }, + { + "dataPath": "params_shard_307.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "942de9fbf4a97dd68626eeea9fd2c864" + }, + { + "dataPath": "params_shard_308.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "d6bd37da376531220a72661f59ccec66" + }, + { + "dataPath": "params_shard_309.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d687b5948dcac8a327c386e6e92a9f8e" + }, + { + "dataPath": "params_shard_310.bin", + "format": "raw-shard", + "nbytes": 29392896, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 9453568 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 24150016 + } + ], + "md5sum": "149c678d7344a4cd7cff3b74dc485863" + }, + { + "dataPath": "params_shard_311.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "5e71b4140fcfc7115a6ce6a27503b432" + }, + { + "dataPath": "params_shard_312.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "b6917e4ff32c7bbc98e6e661f716dcd6" + }, + { + "dataPath": "params_shard_313.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "4fcfc95e7a7f1c1d9bdd3eadd8ae55a1" + }, + { + "dataPath": "params_shard_314.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ba33446a9646aafee5df920927d27390" + }, + { + "dataPath": "params_shard_315.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "50b13a79ef6013848cdd74da1834c4df" + }, + { + "dataPath": "params_shard_316.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "060c4fe0dabc1109812cd89d60571fe0" + }, + { + "dataPath": "params_shard_317.bin", + "format": "raw-shard", + "nbytes": 28360704, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 4194304 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.50.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 9453568 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + }, + { + "name": "model.layers.50.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 24150016 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 28344320 + } + ], + "md5sum": "7980e2e07825d09090ebf98d7b47a5ba" + }, + { + "dataPath": "params_shard_318.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "893e71e81ffe4ca86b90deca16650051" + }, + { + "dataPath": "params_shard_319.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "008e8fbdcc6e9a1202a096b990f2cab9" + }, + { + "dataPath": "params_shard_320.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.51.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "afd2cf142b6b6e7ce795e06544ee1bdd" + }, + { + "dataPath": "params_shard_321.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6058236e91fad4a74c32228e16ad06f1" + }, + { + "dataPath": "params_shard_322.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "b5ad88d064b182d578735806308db86d" + }, + { + "dataPath": "params_shard_323.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.51.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.51.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "c62918828dc597e7c31e9ee8e6e4be58" + }, + { + "dataPath": "params_shard_324.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "08da91fe60a93d520322e530af795803" + }, + { + "dataPath": "params_shard_325.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "763536880fcd2eb2a59421d901e3c272" + }, + { + "dataPath": "params_shard_326.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.52.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "6de3d43705ed07813cbd049ba1f087e6" + }, + { + "dataPath": "params_shard_327.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ac8571a21d91d3b69ebf8c875cb447b1" + }, + { + "dataPath": "params_shard_328.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "f22146a491c91e8d6a42eb362e502b2c" + }, + { + "dataPath": "params_shard_329.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.52.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.52.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "df300a6b529db4361049e58d1d2d12d0" + }, + { + "dataPath": "params_shard_330.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "13b91fb960b4696fa4d2892f7e846609" + }, + { + "dataPath": "params_shard_331.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "3242536356a3e59c174eafd88af2a8da" + }, + { + "dataPath": "params_shard_332.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.53.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "45c80be8c36d72dd6e1c2ad986295b4a" + }, + { + "dataPath": "params_shard_333.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.53.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f56613fef71cfb82ff9b7ee57736d9c1" + }, + { + "dataPath": "params_shard_334.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "c1efbc4e565db39f387e61b79b8353dd" + }, + { + "dataPath": "params_shard_335.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.53.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.53.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "727c926c346e6c28ef89beab4f2167e3" + }, + { + "dataPath": "params_shard_336.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "18db97a34ac9e3cd1b6cf8a0fd4859f8" + }, + { + "dataPath": "params_shard_337.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "35c568574c078b2233eebaf664d0ba32" + }, + { + "dataPath": "params_shard_338.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.54.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "99402c0de86bbc20719470617e518d5c" + }, + { + "dataPath": "params_shard_339.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "26d58d9cec1ea3b687ca49671ce17a32" + }, + { + "dataPath": "params_shard_340.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "4236c3d594f0d33d53dd3e7718d716d4" + }, + { + "dataPath": "params_shard_341.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "896e83f0c1ee2b4ec816bdc1116ac5d5" + }, + { + "dataPath": "params_shard_342.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.55.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "db66f0f512d609ea728b44c0dc5e1ade" + }, + { + "dataPath": "params_shard_343.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b452ec799f30d827f874f61a501d99b8" + }, + { + "dataPath": "params_shard_344.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.54.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.54.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.55.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "688a68bb8461985f781217e81586f502" + }, + { + "dataPath": "params_shard_345.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "faa9db47869f1522aa6fbc1022809d15" + }, + { + "dataPath": "params_shard_346.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "33e47f4ad8d96a1e64eb1b823d1f80b3" + }, + { + "dataPath": "params_shard_347.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.55.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "e3b260bf2e5564226b21dd8269be9c41" + }, + { + "dataPath": "params_shard_348.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "f8c989eea2b08c15b6c4bfaa2110bbf2" + }, + { + "dataPath": "params_shard_349.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "48118caa4825834afbd442f5bcc1bcef" + }, + { + "dataPath": "params_shard_350.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.56.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "3767ede67fa2265b132de10233460b0b" + }, + { + "dataPath": "params_shard_351.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f582ae231dd2632117a592b970e0a5ec" + }, + { + "dataPath": "params_shard_352.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "56ccb3a9d37db2087b19cb623be1187a" + }, + { + "dataPath": "params_shard_353.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.56.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.56.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "33f76b7b061613c37b66e11cc7c63ed0" + }, + { + "dataPath": "params_shard_354.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "9a675af6408a01d486f7b2a2145928a7" + }, + { + "dataPath": "params_shard_355.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "af3750c7b6d12dbee028b06c8fa86121" + }, + { + "dataPath": "params_shard_356.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.57.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "1cf10e214e1986c6f104c1315d0615e9" + }, + { + "dataPath": "params_shard_357.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c0d1cc64253cfd6b1a632c81b84782e4" + }, + { + "dataPath": "params_shard_358.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "69775485d771c679f3168faf069404a6" + }, + { + "dataPath": "params_shard_359.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "9ba3ddd3c22a873d156ec6cc9d3c084a" + }, + { + "dataPath": "params_shard_360.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.58.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "3c2f09567c9f21fe98753ffca0eb3841" + }, + { + "dataPath": "params_shard_361.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.58.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ff1023b58cdc63e204ac1edf36bf4ea3" + }, + { + "dataPath": "params_shard_362.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.57.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.57.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.58.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "6e716bb1962764c7674e0330e08bc88d" + }, + { + "dataPath": "params_shard_363.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "6d41c6a548d1ddbb343ef1ae8d48f104" + }, + { + "dataPath": "params_shard_364.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "c8e1239b9968ac0432fe61f45e0aca22" + }, + { + "dataPath": "params_shard_365.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.58.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.58.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "39bc2b11beb8e6fd15d831ad4f73e272" + }, + { + "dataPath": "params_shard_366.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "ebf28da16b78bf4e41c287c37ffe1b99" + }, + { + "dataPath": "params_shard_367.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "7bf1265c17290a8f1e9969f151556961" + }, + { + "dataPath": "params_shard_368.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.59.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "97a240629273ae08d9e0230cfd99b35a" + }, + { + "dataPath": "params_shard_369.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3540b1669c45328abe6b7d93e9a91d03" + }, + { + "dataPath": "params_shard_370.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "d3b9410ce86e5a46b1b4c1da46f1041f" + }, + { + "dataPath": "params_shard_371.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.59.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.59.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "bd12f7ff26448fdd79f62d91e22c6991" + }, + { + "dataPath": "params_shard_372.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "34eb4d791ef2c3d5faf15659c98bf5cd" + }, + { + "dataPath": "params_shard_373.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "cc3e9964554785174641312cb6c2cdfb" + }, + { + "dataPath": "params_shard_374.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.60.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "44a61022bbcf58237e9919c824f6ac1a" + }, + { + "dataPath": "params_shard_375.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "5788383cb49ffe519ea331ff593f6796" + }, + { + "dataPath": "params_shard_376.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.61.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ea7f4f052ba9c8720e03ef573d6ffe58" + }, + { + "dataPath": "params_shard_377.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d5ab789657ad278d899ed919bd2aba80" + }, + { + "dataPath": "params_shard_378.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.60.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.60.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.61.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "dd0233dfab538730e75a14ca2fb8d99f" + }, + { + "dataPath": "params_shard_379.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "89600a66752eb915942794c3514a41e1" + }, + { + "dataPath": "params_shard_380.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "13668ac76aba5cc49d35665558d65bbf" + }, + { + "dataPath": "params_shard_381.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "07ba651e857be44ca0f3780e3e6a21f7" + }, + { + "dataPath": "params_shard_382.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "8f81b62c19c7b6a1d1ef34671dc3dc77" + }, + { + "dataPath": "params_shard_383.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.61.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "9011362e9465e823151819ad9cc22577" + }, + { + "dataPath": "params_shard_384.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "a54273aae3a983dc63ef9a996b797bec" + }, + { + "dataPath": "params_shard_385.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "1d04df3ae7648d3d5daacbf5182c0b8c" + }, + { + "dataPath": "params_shard_386.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.62.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "af6476a28dea11be9a541fd5aa1e9e9c" + }, + { + "dataPath": "params_shard_387.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "8a015734a7aa04182fb1d04d8e62b6d8" + }, + { + "dataPath": "params_shard_388.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "00efead556b7eb3734fa732517493cd9" + }, + { + "dataPath": "params_shard_389.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.62.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.62.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "579f3cdab2a3297772047de3a8a719e1" + }, + { + "dataPath": "params_shard_390.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "84788ea8883a713e6e64f8db0ee57ccc" + }, + { + "dataPath": "params_shard_391.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "843acd399eb0aa16f3e701b046426047" + }, + { + "dataPath": "params_shard_392.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.63.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "5b64925585bc302984d27ff6c2bdd782" + }, + { + "dataPath": "params_shard_393.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.63.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9f1497c8940739f65a9135e31b4eea2a" + }, + { + "dataPath": "params_shard_394.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.64.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ac064ff5b8590527a346d6dc7eda2424" + }, + { + "dataPath": "params_shard_395.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.64.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "32801e9546e2fc11090ee9aaf0900708" + }, + { + "dataPath": "params_shard_396.bin", + "format": "raw-shard", + "nbytes": 29392896, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.63.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.63.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.64.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 24133632 + }, + { + "name": "model.layers.64.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 29376512 + } + ], + "md5sum": "baf6a46ba318e16aae83c5cccdb2aab0" + }, + { + "dataPath": "params_shard_397.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.64.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "fc078e333fc7d6221bc9b7d99c354f5d" + }, + { + "dataPath": "params_shard_398.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.64.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "0483c787405f3a7a1bf7b63a1b7de1b8" + }, + { + "dataPath": "params_shard_399.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.64.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "5797e6414391bf371e1449cfb56c3ea9" + }, + { + "dataPath": "params_shard_400.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.65.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "1a61b14f4a715e4c0291d35f1c3c1bbf" + }, + { + "dataPath": "params_shard_401.bin", + "format": "raw-shard", + "nbytes": 18907136, + "records": [ + { + "name": "model.layers.64.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.64.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.64.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 14696448 + }, + { + "name": "model.layers.65.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18890752 + } + ], + "md5sum": "eb6360d0280acd0881ec9f88a2eaa119" + }, + { + "dataPath": "params_shard_402.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.65.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "5405bcbefc2ef35c9732d617e06b13fe" + }, + { + "dataPath": "params_shard_403.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.65.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "fbfec8e5f705880d9aeba1bfdd7b1e95" + }, + { + "dataPath": "params_shard_404.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.65.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "d75fe06a8b870733b4588b44cfcaf9c1" + }, + { + "dataPath": "params_shard_405.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.65.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c80e0f3a5c3da810310eea82ff301234" + }, + { + "dataPath": "params_shard_406.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.66.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "bd41cc2e51976e4615fb384bb2fc176f" + }, + { + "dataPath": "params_shard_407.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.65.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.65.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.65.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.65.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.66.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "32e2b10fb44a79487c0c9c38cb184299" + }, + { + "dataPath": "params_shard_408.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.66.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "97b28a95632622232b7f622f7c58ef61" + }, + { + "dataPath": "params_shard_409.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.66.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "e5cd47280ac8f590cff1fde2d31738ae" + }, + { + "dataPath": "params_shard_410.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.66.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "797c448b5e5e200aeec0473ddbdb3f7c" + }, + { + "dataPath": "params_shard_411.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.66.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "40693f8486e1a7b9632cf80e82bac214" + }, + { + "dataPath": "params_shard_412.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.67.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "d62c3fb1536ee021c3bdbd6aaba5dc11" + }, + { + "dataPath": "params_shard_413.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.66.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.66.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.66.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.66.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.67.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "92f35c30d0dfa792b4bf2d9b96dcfa69" + }, + { + "dataPath": "params_shard_414.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.67.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "aadba61ff8b3910ed9f9df8db0ee4536" + }, + { + "dataPath": "params_shard_415.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.67.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ad60f7840eca30f754972d8878f0a997" + }, + { + "dataPath": "params_shard_416.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.67.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "945c762db86a767e150a65b1229f3664" + }, + { + "dataPath": "params_shard_417.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.67.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "da5dc9127d9f44798035dbceae949a0f" + }, + { + "dataPath": "params_shard_418.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.68.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "7bc2b122ddf0c78cf0708afcd4b61d73" + }, + { + "dataPath": "params_shard_419.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.67.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.67.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.67.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.67.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.68.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "0b9305fffbedaaf24911af19bdadd0b0" + }, + { + "dataPath": "params_shard_420.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.68.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "66f72ef03d1975f64d0cce44b7d40069" + }, + { + "dataPath": "params_shard_421.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.68.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "7f75112ff91affca125445303acae6a2" + }, + { + "dataPath": "params_shard_422.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.68.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "0979a6d0a426da80cc8335a8d551c82e" + }, + { + "dataPath": "params_shard_423.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.68.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c3e69d5b2401e5ac1ee50ed76e5eb4f0" + }, + { + "dataPath": "params_shard_424.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.69.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "b62da5f313cade9bdf46f280f1944440" + }, + { + "dataPath": "params_shard_425.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.69.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "85822ab8fd191546692ff34f7f6b1ea9" + }, + { + "dataPath": "params_shard_426.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.69.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "97f4d78c319345b7c7b5af8dbce2fa57" + }, + { + "dataPath": "params_shard_427.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.69.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ce296c09349e3b4aeaad845dbea575c0" + }, + { + "dataPath": "params_shard_428.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.68.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.68.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.68.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.68.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.69.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "9f2f037ddbbd0ffc39f386bec16d99d1" + }, + { + "dataPath": "params_shard_429.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.69.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "5c85baf680f0adc0dd64e8ec2c02acea" + }, + { + "dataPath": "params_shard_430.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.70.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "c15c9257f4f734c12a62b928624f8cfc" + }, + { + "dataPath": "params_shard_431.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.69.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.69.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.69.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.69.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.70.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "f64d81f2073f47e56fb7e4a71620df92" + }, + { + "dataPath": "params_shard_432.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.70.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "e6efe49025eb43b796847016148af598" + }, + { + "dataPath": "params_shard_433.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.70.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "e29aa67f478254f98847deadc0824f8c" + }, + { + "dataPath": "params_shard_434.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.70.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "aaa39214da8e39235d9bd00d6767f219" + }, + { + "dataPath": "params_shard_435.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.70.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3162646b65fdcd5a6c0fe175e11c90ef" + }, + { + "dataPath": "params_shard_436.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.71.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "47b979d4f3ad5c0cf5dae4eda71c6ecc" + }, + { + "dataPath": "params_shard_437.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.70.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.70.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.70.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.70.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.71.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "eeecc1b92d814b89ea11fdbef92c203e" + }, + { + "dataPath": "params_shard_438.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.71.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "8d325f8f53a9ddd20e4c2931298a491a" + }, + { + "dataPath": "params_shard_439.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.71.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "28fcd8dcf58337c246d6ff2f53d4d7c9" + }, + { + "dataPath": "params_shard_440.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.71.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ec5e8cb34a7dbef73e5ed4be97027605" + }, + { + "dataPath": "params_shard_441.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.71.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ff95d2a13e190a1435bcacbb37d721bd" + }, + { + "dataPath": "params_shard_442.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.72.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "bd8c99d8ad9b13ef28e2a08ea2f4142f" + }, + { + "dataPath": "params_shard_443.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.72.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "3083ebdbe9ba24b5afc27b8030ae62dc" + }, + { + "dataPath": "params_shard_444.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.72.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "d6bc21a8b57f909dadb7f8fe617ade26" + }, + { + "dataPath": "params_shard_445.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.72.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "5111a59f5f457f0741a82518507f0ecf" + }, + { + "dataPath": "params_shard_446.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.71.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.71.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.71.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.71.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.72.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "3458d0b74bfd4a165e5ac7d549e6c49f" + }, + { + "dataPath": "params_shard_447.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.72.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "30342bc6a43799c32d8a4676cb061c50" + }, + { + "dataPath": "params_shard_448.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.73.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "9796baf351dd1d6f04f00ca430a90064" + }, + { + "dataPath": "params_shard_449.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.72.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.72.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.72.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.72.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.73.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "7af43e6e40d7a078453deefd8fd8643f" + }, + { + "dataPath": "params_shard_450.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.73.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "fa558c5e53ceb6dd8c4e952d2f378237" + }, + { + "dataPath": "params_shard_451.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.73.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "8b76dcff3e574c54e8be0dbd0471bff8" + }, + { + "dataPath": "params_shard_452.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.73.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "04f94002f7f78d39dd9b8e4d053ce9af" + }, + { + "dataPath": "params_shard_453.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.73.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "91bf234a59c218dc07a0f27348be48b9" + }, + { + "dataPath": "params_shard_454.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.74.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "baa32d8b470e0d2db105c7f675d0cf27" + }, + { + "dataPath": "params_shard_455.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.73.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.73.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.73.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.73.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.74.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "4c048e2854f48c1122b010c0632e5eec" + }, + { + "dataPath": "params_shard_456.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.74.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "469fcada64b9d751c04af53fcd05782d" + }, + { + "dataPath": "params_shard_457.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.74.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "be97d111316c1742df5d15776b2d7040" + }, + { + "dataPath": "params_shard_458.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.74.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "5c4cd0d2c7d9232777f5f9ea25712295" + }, + { + "dataPath": "params_shard_459.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.74.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d0c740771066a9b34f610ac2fce75966" + }, + { + "dataPath": "params_shard_460.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.75.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ca97ad08d8da8ac73258ebbdbeddef88" + }, + { + "dataPath": "params_shard_461.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.75.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "db4355b857ac550e94052a2a201af939" + }, + { + "dataPath": "params_shard_462.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.74.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.74.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.74.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.74.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.75.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "e881cc9caaa68de1590797fd92b5be62" + }, + { + "dataPath": "params_shard_463.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.75.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "5f4402c3885220559a7c7b36be0c7a3d" + }, + { + "dataPath": "params_shard_464.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.75.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "25e426f642d33c1a074978d3f72b41dc" + }, + { + "dataPath": "params_shard_465.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.75.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "65bf543f50c6514bf5d7e7fac02bd5ed" + }, + { + "dataPath": "params_shard_466.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.76.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "9cf6a5a28f754d9c8387dd159a318a48" + }, + { + "dataPath": "params_shard_467.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.75.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.75.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.75.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.75.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.76.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "9c09e12512d2b41fdc5b9d956f694bb2" + }, + { + "dataPath": "params_shard_468.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.76.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "c1b847b2550900dd66a9d481663b4891" + }, + { + "dataPath": "params_shard_469.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.76.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "252267f457592baf91b86cb941597951" + }, + { + "dataPath": "params_shard_470.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.76.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "07703217a96a22abcf4e96d56e0f36a6" + }, + { + "dataPath": "params_shard_471.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.76.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "94ec967a7e32ee4eed6f9555d6a1069f" + }, + { + "dataPath": "params_shard_472.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.77.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "0a4daa8446c6c65cc910e3d075f00569" + }, + { + "dataPath": "params_shard_473.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.76.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.76.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.76.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.76.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.77.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "29a6686a99d6ed4963be2e9e8a088ac1" + }, + { + "dataPath": "params_shard_474.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.77.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "c1c1abc51e46977145d04025aa233c74" + }, + { + "dataPath": "params_shard_475.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.77.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "1b4aa462f97c0c99de03f7cd4399fa4e" + }, + { + "dataPath": "params_shard_476.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.77.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "183c0ebf802a7e0222cd1b019a3b2b8a" + }, + { + "dataPath": "params_shard_477.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.77.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "113f727481456f56950a7e80f49e9f11" + }, + { + "dataPath": "params_shard_478.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.78.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "b8b581138f501b1a6c5e9a1497e9effb" + }, + { + "dataPath": "params_shard_479.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.77.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.77.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.77.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.77.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.78.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "2888f90cb86d78a4efd77c9c07feb289" + } + ] +} \ No newline at end of file