{ "metadata": { "ParamSize": 805, "ParamBytes": 43115020288.0, "BitsPerParam": 4.836979883651508 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 131137536, "records": [ { "name": "lm_head.q_weight", "shape": [ 32016, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131137536, "byteOffset": 0 } ], "md5sum": "92c54271d604513980fbba18a5d415e4" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.78.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "8759e093f86e3d4f800da1347b8c28d0" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "754d412a7cf8322470fa9a824bb03582" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "aa79f69aa8bafdba5e3a25088af5e90e" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "48ddc18c1645a6db6c7f199c29340bb0" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 31105024, "records": [ { "name": "lm_head.q_scale", "shape": [ 32016, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16392192, "byteOffset": 0 }, { "name": "model.layers.78.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 16392192 }, { "name": "model.layers.78.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 16408576 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 31088640 } ], "md5sum": "5b606c8a7605532ad84ac202b030d61a" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.79.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "77483f33536c7facb20fee780d9c9424" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a79919276b74aaf126a7a660768391a0" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "81b0ab8188abd912fc55925301332fbd" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.79.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2aba4501c4cfdc48b5c8706c2510eaec" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4cd92c2d8b86fcc716a6148d09c893a1" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 131137536, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32016, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131137536, "byteOffset": 0 } ], "md5sum": "2a3177679984267d9d9733ac25293707" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 28360704, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.79.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.79.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.79.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 18907136 }, { "name": "model.layers.79.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 24150016 }, { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 28344320 } ], "md5sum": "416b688e5dcecf8d79284ee88e00449e" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "aeef5d23d0d550b67007ad3dd0d825ff" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d564d3c19d4aeb8da0d0441ad04fd798" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c34427fd24feb272e3218c72848fce10" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0948a17e17b0a7ed52ed168d7a5e35bc" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 31105024, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 32016, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16392192, "byteOffset": 0 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 16392192 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 16408576 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 31088640 } ], "md5sum": "01caa7d40b3909ff12f6c0446e9e7af3" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4e59990dac77a9c24493b2b07bde9a71" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5517ff4f3fdef91dbddc9be9e422b552" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "62ef18450ec41313d539cb90a914e85f" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "227e4f43ffbea597bddad08ac874c196" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "086de728ed491dde4fde2ad66470d031" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5a57a91151afcb8d3e348bd0aace6dea" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 9453568 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24150016 } ], "md5sum": "3bd9cd137bd3020b17434b9fbfbd8280" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "dda999c0b1178b124baaac852f75ddb3" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "bb41b8dc6341070b07f212b3fbb1d885" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5bcd24adfcc1e901e7acf23ea38735ab" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "68e30c5bf1231e1aff9d68b939e63db0" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "38545d8809d301af4e24511da1da8c79" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b4b7432a109c3a8ff4e7b82fc84f9e67" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d6fe87f468d47dff6c8ae376fc717199" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f5bd92b5ae97345867f9ebc5380530d1" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 4194304 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 9437184 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 13631488 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 13647872 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 28327936 } ], "md5sum": "ea7d370e3e5aad232b9613e7a745e534" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "310241a5fbc5fd5483103a20d85c4400" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "fbf24ce23d2a9eaf6932b090a7147859" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "bad347da4768ab1e341660f959e52d36" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "bddd9d97d503477f5807f02c303f703b" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3e392dca52595126be3d276b5359344b" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0c5203186e09dffd58d21eac2d2ee511" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 28360704, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 9453568 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 24150016 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 28344320 } ], "md5sum": "a5bdf6df35df8226effcab474785b6a3" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "70dbe41f902c01e9fa1ef381701ce0f7" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "178de84061e791c22153caf00b21b59a" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "45107c1000bf3f247511e08dcd7ceba1" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "869d24353a33dd95ddb1bb3c786cb13d" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "8ab69d7867cb27b27f030ca44fe6abc6" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "1d26c5299bb1fd3cb1118a46e92cfa95" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "bdf64bb31f06b6004c56c93294e633bd" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "10ee09359e22516fa43197cbf0c4fbfe" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e6646cc63f0d02fa34804509bb55b30a" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d998e3a6256ddf08d82396217d791cb8" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7c7d18dbefadab8cd1a62dfacd02916b" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "39732abc2cfa285d6a060253278a5930" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "77a90a5b0d42bfd988ae79482ae0bc23" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8f58729e86c0ceb7d13ae51e5ef4a906" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e75885239c580f64e81df5a24eaa91c5" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9be378a39ba47987a82ddd32199020fc" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "85a91ad7384b8c18dea8fe029fc7f12d" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d3d363f16cc992a860fc1e8244a316f8" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "161df1547ae012e296f6cda51df9913e" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "72404c2df25107d7232cdb013cb9737d" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "2b56aca2045bb0d69052a51fd1cf54c7" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "280575da8899e1d96bad8d46b980c36f" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a9e86890565a20b2bd4113e62fbca167" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "50156e4e7ae30a2d484e1dfa061f2f16" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b71f4e97e8b041ac76c6b581d834d3d0" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "01cdf7a5d4876f418fec757368e618b4" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5c973eb232a9940d531e8629f7ca82fb" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "58a87e74c9fc449418fe9fe7cc2fa73c" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "32f7ce5c8b204b5747f56708f0f65a97" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "e37742c44ce9f2b0f446a7fde2e1076e" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4bdd07a6160a9eb91c77471842fb8cd0" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "daffabee68487ed23ae579fddfce4f5c" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f845a52039e3f91d25dd7c64a3dde7ef" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "85f2c5165c9b6278ff6e008eae52a389" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "334714bc455fd27a4966c0647a8058cc" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ccb354a3d4237e55c309e174da2e7ac9" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d34227f12560cc1808fb97520e2ff2cc" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "301da518462509443087055f3cc94a1e" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "817e4dc3fccbf7e1ea4b69bd03fd0cd4" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "580d795d4715d93e40cc688b4e57366e" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "4bbc1286e1b506f05470abcfa5f07672" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "5d5b94b328047caeaaf63ad2c154667f" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3ee0dfc2ba955b3a290e334e90cabab1" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "10eec93c1daa301ac25e9f9c46008e89" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "59950380146f3867b220c4201afbf993" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "123f1efbf936450a4f7019e8280aa146" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "bccaaf2c91623207949379460ba9ccda" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "8754d15db9531162feedbb06624f3a37" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a86a8e55bc14dce4f04839dbf7b2a35c" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1720f71d46276e8e8020bde725fef41c" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "47ee2bf1cef17642a4adc0efa6f5bec2" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "22ce9d8a58d7682ec1123c09aa5bb9e6" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a48f6d2325b05090f86db9f16db093c6" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a40795fad4f10de0570bffbece878fd0" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "ae027598142bf5efac89e68f6a9a0975" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ecedaa61c3c520235fdad1e7802455a4" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9404152164f43b5e344a401f69a25d92" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a46cce931e51acd59bf8785ccd176ebe" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "202d65704e0bff3e392aa5abb1767bf2" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "bb0b8796c31dc76fe185e902f6c709c1" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a4c565f4a55534df8016859862d2dcdb" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "bcb9dfcf4bd69cd26a73d79e3bf6efba" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8b6fa204a26b6f21ec213d27b7d7e894" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "625ff1a5312569697a038338c2f09f56" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "3fba4ef90189665fdbeb882c575eb823" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "4dad54fff1f1856e05bfcf7835e15cc7" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6b735a23364b8b5c8d0a6cf1b30d983c" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "168a3a4b7ce31bc687a9abf3e8526379" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "df0d4adb34be6fdc3ea9d07002e78aed" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "216871a0a43337d0f29d9f69324ab375" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4678e933c4ad96d9b32bd56019acf277" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "4e69ff7dfdd1e5c8f97cf18aa59f4ddc" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 29376512 } ], "md5sum": "05c85139f7bec6570b8d4cc03dab3e5f" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0924eea52ee15a947b8c7a31f0515d52" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6576b08b73b0f34e7c0157471ea849f4" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f47b71d0a6547467ea6ac09fcfd9e85a" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "86caa14eef6065444721f83c13821ded" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 29409280, "records": [ { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 14712832 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 29392896 } ], "md5sum": "f0bb59bed185b1478ffc23bf75c47a7e" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "77e611b137554bd2da16bef16b5df97b" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "379d8c0c960ad95b868f0249fe2c5e14" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f738c37c0526f7a1a23717e7bcecb1a4" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "7a163ec520272cf0639d5412317fec01" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8727220d42a8b80746845fbadbf62c64" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3e2e2f4ef0013a7e3a541a8d57fe76d8" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 9453568 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24150016 } ], "md5sum": "480c50654aa6a94a49b0c994ccf94c08" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "de8dabfde00d2818f388b395230eb274" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ecee0db55a3ed4819a75b3bdcf2ba58e" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9b558b56f836c7d947eb80c8f04db1a1" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "203a07785f2915cd92c02c2fa9dba534" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "9e6b89938a04c3239c6c25daa8f4438d" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fb585b1355a8f3be319de349414102ea" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2831d2fd90c22797a609a786acd320c2" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 32555008, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 4194304 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 9437184 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 13631488 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 13647872 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 28327936 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 28344320 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 32538624 } ], "md5sum": "e470b9f3a5afc6030cfbda46ec15b302" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8463c95a651de034144ef896b795103d" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4031f7667616860f69cfc302f5c8c523" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c9cce8fb3291deb6f549f93efc56ec03" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "373f7405040007591a531f7a04ac2e40" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a15bd572cd8000183d346259f2bc4618" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "c46fad2a175586db5fa28a43683610d1" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3ca800b31b31aaa582b6de149f91ee00" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "7abe61b46e948414a513e86e14ec7084" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3689bdce5eea611f20782290dcb3c9b2" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fafc66ab56af21ff6cc4fb5a08cc79fb" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "58d3ddf3a7682e73cb155fb9ef3b883a" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "2eb5745013f0a6024920edc590cfa2b7" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "7b6eb53155535ec5fb1ed96e2265233d" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e2e861e055a6a1506457b59984863e6a" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7be04d055469f487543e2469e1c602af" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a12041e23f4704f85ea857f8b0c54b83" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "bf0984b60802016947e752f08a634b9d" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "a64f0d2ba4702eede488dae652416f02" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d09fe929e57e17ad4ed41e493fb283ab" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2b17e3944e9d8b535f928e98a81bb12c" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9a891b28741e3101eead68e9e0bbe0d4" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3ddc092f0761c25b07a001ab195fc3c5" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "fe795c55a93d5850528e88ef87126287" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "fb12557be1f90f9457c9e95a60e47391" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0f0b2c17935ab82dc03fcab7692f69a7" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d45991e0d4f097420871fc13e0b6b836" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "346b9a649a6957399ddc8bf5a5ec47c7" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "114abe768fbc2c7f4399777ba33d0c33" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "544fcd682ea8b027fa3f983daf62191a" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "c2f496995844364596f2fdd4db10ddda" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8fa58907b7bfafb8510ef5222f21a7a8" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "9011af7f2e67a86db1e34e5fa955944e" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ef1e945c0e3321d0a2a93521640f3fc9" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6ac174a5d0ebb4d3264ccb3e12c81864" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "438eb42ba46f2efd8477e26bfb7f06e3" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "12321247de24a8c249c21a1696210e54" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "7915826ed941c781edd26898a88357fd" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "497c328f71138cce8ba485afe292b589" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d1a565e423d844af704e0db1c4195575" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9834132eeffed7930ccbaf8789f51014" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "baf109929f47a31e0379997fbe52ca99" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b91b1383211dd0b1026fd9e4b86e7aa0" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3563b044b75f377aff30b09bec9ba9d2" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b870ef4d175d2c36810dcbfeb4f5c5f9" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "0d5a27955122e3c5965659a45ae83d0e" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2d3804b92dd538c42033dbf5a036c3ee" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "41e578a45867265ddb53902ad24cfeed" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "96fcf5d46a60e5f1a2c5a33543872ed9" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "270aad9a32935b467ecca866b6b064c0" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b4b3b5a02f9fd15cd75c02e6675161da" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6a73ccee7dafc019897dee259903c8b3" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0872b5e3500d5648a6f17f05db973a64" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6a8491127a616b7647ec03df01784976" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "2b1919d9e0e199fd81eb3c1977a86752" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "53fd2ba90f20a58a55c5d6cd1b7ab777" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "bec6a7303d7c300a8baf8e3a73247dfc" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0c76db2a6ed990a25d8501ffee516dc0" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8a53d71d130d23eae6961c98d12b65ca" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3cc042389a1f616c2515956cdea5c413" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "95e4a35039864dd89e3a55539a114ef2" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "4dfdba02ea9ef9602daf7dcc95f898ed" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "90aeee2da8a13a7c44ec8bbd957ce0d5" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "eeb094f3ab48479b1beffbe1b802bb40" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "64e99cfff16e8d42da999abaf66ea58e" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5fbb7bb67ec73e6e69990e2708f29775" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "bf613fc9488dbf7c61d6a4718ded18cc" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "dfd49c53d832a025f7d095d219d98e3f" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2750a7e76386f7e28dfc694ed33cda50" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c199f97960521aac60532408c84de7ac" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "127a8203a670d4dd7a352ae4fdcc8a72" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c6ea19f464899a430f749c6a2d948ec9" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "28f1c2bf8e384bebc12c0a475fec74f0" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "164c6f700be14830fae83274c161f5fc" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c93b021b25c6afe1ff0277a278443f52" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2c107086ad9d3a4d3a43698778de154c" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "bd94261d7a9f2935c4b065416d93a5dd" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4383b542e0f1195b22db88d820cd7802" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "dbff62161c311162f135d12a411fbfef" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 29376512 } ], "md5sum": "e99f507569efa3675e91c40a4917df5b" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "33238786ac25ab32257032f4496e9f56" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b3d4d4d09fa1fafbd3c7844f532d6813" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4be9133a9aa6e77fb96ece9475a67ce5" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "99a06b1a7eb7b6bd979b326ee0c43550" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 18907136, "records": [ { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 14696448 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 } ], "md5sum": "df721507111fa35b2c95809ac74fde6b" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1bb18c6b819e56e038ffc82e9933dd8e" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f4ecb06e0750bd3df793a747a0ca1777" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "534f3b7b43932dc9973c2c3fd9d336da" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cb83798ea0fb24a355b106dd941b2e34" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5d954415b5724c648b5780f0d56e784f" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "d232ce2e29173752d13985b495bb02ac" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "123bbee1f94d6d49e919a884078c31de" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "57d7aee64dc0872a716f0431fbb3949e" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d2c5f0fd67f17cb3ac276b51789ebcca" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3a4794aedc2c02b5d755841bf3504a6b" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "fc82891bd4ecd088bb1c883807808600" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "fa7763c4c0ef76d36b19194f2841e70b" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4bde6d5f8e60741a9ca7447bbe66a265" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5b20f94c8f2e430081f04fa3438794e2" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1cc5ba24dceb4c357f041b92f4fa1496" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6431e947c7c2f24e60ceb2192eeeeb47" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "31b0e33e42386b3c1fe54b8650e6ba81" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "0ba5c846a25b7eff8a619aa9d00d0db8" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "af02fc8d5131f9fff7509c713051210f" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "fc189f16ccdc874448250fa61870eda7" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b27c4f8424186c02cdd6a25b753724ef" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3287c26238361d8cbd602ef294d48331" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8a3e2ee6da3368993e4e1dc0c4ae2d05" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5d39b5b363c31e039eafb18cb53c414f" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6a2e98ba37772feaa3db5e1c27628fab" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "05e47b0746e404415468a047736a3cba" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "06b48320f1d34204b71cea8602278188" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "8fadf6b6b95dfa8f8ac1eb40ecffb29b" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2015b188722ab0a46cba74c89006af1a" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "8139fc644c52027f8b3f756dd6ccefca" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c1477227b5e8a5e9838b6270cac37237" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "bdbc4e165bacb3254141644bad524e37" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4a89910bb87861712334fc8f0f0c5bae" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2d6fa400c0342b1aa3519f11ebcfc0d4" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e7864291178b69276d2298f1d94a05f6" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.42.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "50dd504c0e45460d290546da0b299951" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "54b94c8baaec40a046f851e8724741d0" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "9948c6f66c0db6dd38640aabd679266f" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7398c29ee8236a5abd0e61b826b9c74c" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2d1f27b669652459738888d7ecb83583" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "94ee5c0c31400ba9cebd0895026c83d5" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "de56880273e68404c22ec0d01a08bbcb" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2769b08e8f028c6ecb5adae5e7cd9fca" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "eb6382da706999d4a569a21ec6150235" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.43.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.44.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "626ab1dcc20d843ed612734bf10f932d" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1791e443c068711e099c7a8d0a7d8737" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c5c20fc5a49c2aaa5d168a3925433f47" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "44641dfe98b03f9db3ba721f37bd6e10" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8e84217bb0cd07039e0b6f2930be26fb" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0bdb34ad585ea4de479725cd841b27c0" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "980b24d7b5994b09b9bb2cf086068fd1" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0a0cd6487fca619d00d3533c0eea3d3d" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "db0ea8524570520c779fb18787b2df36" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.45.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "858deb1116c96ffe7eea48a64955b487" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f9a35f307aaa71a89259e75c44b16bca" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d3c7cc9659ae453386742bb2879865ff" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "cd637a530c98ee57f45b8d13473280d1" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "39acd4c507e0f49410955d9eb2776e68" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.47.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c80b23b45864423383bc9231a46be25b" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a59a34d3c1d7ea5eb80d7659b73297c5" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.46.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.47.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "c1f9010762b78ac5875790454426f418" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2db71ea7bdba35e7b4d71ee0ebdf0f84" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "bcd3e6b1ba84f2afb4b05b795cc53c3d" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a44aa82164bf8bba723b9bf671ca5152" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "134090034c3ab1778c0cdb6ba16ac3a7" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "43b420b6df6b03ba38722cddf58bae67" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "aa4eaccd63b9c36b9a0669720f50bbb2" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "943ebfed1d384e8faac251391ba5fa53" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.48.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5da720dfa7316170c220289dc8986f68" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ccca57402f38295106801d9da38080e5" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6e051e878f60bfa25b64f7e7806508f2" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.48.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "b44f5afd9d9869b73ef478bd49785e9c" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d5a00fa31743e9453516e99d809ccc12" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5463cad40deb1a3bc4272c068cae8a63" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.49.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6e8ad405b142a07810df0e5afd49ec38" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "bc4b1925b010bfd398ae6631de60b3b1" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.50.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c9eebac88c5741d769d7aaf143d93bdb" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ff9bad4c0879c31ae23b771aa462f09e" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.49.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.50.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 29376512 } ], "md5sum": "b2e9a5e00f2f04749b19a40866928e9b" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "384a942fa510fa87eb46cb2c1a5a9feb" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "42e49e3c6c2d708796d7e93351410a4f" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a258ac46648ac4a8a2856091068e870b" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "7c5f337d3f652efb37ec2625738fa860" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a843a65c49480de3e785d977f53ab3dd" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3773fdfff0dd65c637f42ccb44845213" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 29409280, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 14712832 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 29392896 } ], "md5sum": "c464fba25c11e010699d932e74d8120e" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5dbf50f307310e4c15fdea8ca497a1f5" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "8e0ae28fe2c257aa411aa5323b6166d4" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3e00aad770fe5324601524ddaca641cf" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "942de9fbf4a97dd68626eeea9fd2c864" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d6bd37da376531220a72661f59ccec66" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d687b5948dcac8a327c386e6e92a9f8e" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 9453568 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24150016 } ], "md5sum": "149c678d7344a4cd7cff3b74dc485863" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5e71b4140fcfc7115a6ce6a27503b432" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b6917e4ff32c7bbc98e6e661f716dcd6" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4fcfc95e7a7f1c1d9bdd3eadd8ae55a1" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ba33446a9646aafee5df920927d27390" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "50b13a79ef6013848cdd74da1834c4df" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "060c4fe0dabc1109812cd89d60571fe0" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 28360704, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 4194304 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 9453568 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 }, { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 24150016 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 28344320 } ], "md5sum": "7980e2e07825d09090ebf98d7b47a5ba" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "893e71e81ffe4ca86b90deca16650051" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "008e8fbdcc6e9a1202a096b990f2cab9" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.51.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "afd2cf142b6b6e7ce795e06544ee1bdd" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6058236e91fad4a74c32228e16ad06f1" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b5ad88d064b182d578735806308db86d" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.51.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "c62918828dc597e7c31e9ee8e6e4be58" }, { "dataPath": "params_shard_324.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "08da91fe60a93d520322e530af795803" }, { "dataPath": "params_shard_325.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "763536880fcd2eb2a59421d901e3c272" }, { "dataPath": "params_shard_326.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.52.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6de3d43705ed07813cbd049ba1f087e6" }, { "dataPath": "params_shard_327.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ac8571a21d91d3b69ebf8c875cb447b1" }, { "dataPath": "params_shard_328.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f22146a491c91e8d6a42eb362e502b2c" }, { "dataPath": "params_shard_329.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.52.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "df300a6b529db4361049e58d1d2d12d0" }, { "dataPath": "params_shard_330.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "13b91fb960b4696fa4d2892f7e846609" }, { "dataPath": "params_shard_331.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3242536356a3e59c174eafd88af2a8da" }, { "dataPath": "params_shard_332.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.53.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "45c80be8c36d72dd6e1c2ad986295b4a" }, { "dataPath": "params_shard_333.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f56613fef71cfb82ff9b7ee57736d9c1" }, { "dataPath": "params_shard_334.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c1efbc4e565db39f387e61b79b8353dd" }, { "dataPath": "params_shard_335.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.53.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "727c926c346e6c28ef89beab4f2167e3" }, { "dataPath": "params_shard_336.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "18db97a34ac9e3cd1b6cf8a0fd4859f8" }, { "dataPath": "params_shard_337.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "35c568574c078b2233eebaf664d0ba32" }, { "dataPath": "params_shard_338.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.54.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "99402c0de86bbc20719470617e518d5c" }, { "dataPath": "params_shard_339.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "26d58d9cec1ea3b687ca49671ce17a32" }, { "dataPath": "params_shard_340.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4236c3d594f0d33d53dd3e7718d716d4" }, { "dataPath": "params_shard_341.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "896e83f0c1ee2b4ec816bdc1116ac5d5" }, { "dataPath": "params_shard_342.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.55.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "db66f0f512d609ea728b44c0dc5e1ade" }, { "dataPath": "params_shard_343.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b452ec799f30d827f874f61a501d99b8" }, { "dataPath": "params_shard_344.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.54.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.55.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "688a68bb8461985f781217e81586f502" }, { "dataPath": "params_shard_345.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "faa9db47869f1522aa6fbc1022809d15" }, { "dataPath": "params_shard_346.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "33e47f4ad8d96a1e64eb1b823d1f80b3" }, { "dataPath": "params_shard_347.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "e3b260bf2e5564226b21dd8269be9c41" }, { "dataPath": "params_shard_348.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f8c989eea2b08c15b6c4bfaa2110bbf2" }, { "dataPath": "params_shard_349.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "48118caa4825834afbd442f5bcc1bcef" }, { "dataPath": "params_shard_350.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.56.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3767ede67fa2265b132de10233460b0b" }, { "dataPath": "params_shard_351.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f582ae231dd2632117a592b970e0a5ec" }, { "dataPath": "params_shard_352.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "56ccb3a9d37db2087b19cb623be1187a" }, { "dataPath": "params_shard_353.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.56.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "33f76b7b061613c37b66e11cc7c63ed0" }, { "dataPath": "params_shard_354.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9a675af6408a01d486f7b2a2145928a7" }, { "dataPath": "params_shard_355.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "af3750c7b6d12dbee028b06c8fa86121" }, { "dataPath": "params_shard_356.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.57.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1cf10e214e1986c6f104c1315d0615e9" }, { "dataPath": "params_shard_357.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c0d1cc64253cfd6b1a632c81b84782e4" }, { "dataPath": "params_shard_358.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "69775485d771c679f3168faf069404a6" }, { "dataPath": "params_shard_359.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "9ba3ddd3c22a873d156ec6cc9d3c084a" }, { "dataPath": "params_shard_360.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.58.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3c2f09567c9f21fe98753ffca0eb3841" }, { "dataPath": "params_shard_361.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ff1023b58cdc63e204ac1edf36bf4ea3" }, { "dataPath": "params_shard_362.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.57.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.58.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "6e716bb1962764c7674e0330e08bc88d" }, { "dataPath": "params_shard_363.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6d41c6a548d1ddbb343ef1ae8d48f104" }, { "dataPath": "params_shard_364.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c8e1239b9968ac0432fe61f45e0aca22" }, { "dataPath": "params_shard_365.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "39bc2b11beb8e6fd15d831ad4f73e272" }, { "dataPath": "params_shard_366.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ebf28da16b78bf4e41c287c37ffe1b99" }, { "dataPath": "params_shard_367.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "7bf1265c17290a8f1e9969f151556961" }, { "dataPath": "params_shard_368.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.59.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "97a240629273ae08d9e0230cfd99b35a" }, { "dataPath": "params_shard_369.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3540b1669c45328abe6b7d93e9a91d03" }, { "dataPath": "params_shard_370.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d3b9410ce86e5a46b1b4c1da46f1041f" }, { "dataPath": "params_shard_371.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.59.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "bd12f7ff26448fdd79f62d91e22c6991" }, { "dataPath": "params_shard_372.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "34eb4d791ef2c3d5faf15659c98bf5cd" }, { "dataPath": "params_shard_373.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "cc3e9964554785174641312cb6c2cdfb" }, { "dataPath": "params_shard_374.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.60.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "44a61022bbcf58237e9919c824f6ac1a" }, { "dataPath": "params_shard_375.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5788383cb49ffe519ea331ff593f6796" }, { "dataPath": "params_shard_376.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.61.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ea7f4f052ba9c8720e03ef573d6ffe58" }, { "dataPath": "params_shard_377.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d5ab789657ad278d899ed919bd2aba80" }, { "dataPath": "params_shard_378.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.60.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.61.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "dd0233dfab538730e75a14ca2fb8d99f" }, { "dataPath": "params_shard_379.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "89600a66752eb915942794c3514a41e1" }, { "dataPath": "params_shard_380.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "13668ac76aba5cc49d35665558d65bbf" }, { "dataPath": "params_shard_381.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "07ba651e857be44ca0f3780e3e6a21f7" }, { "dataPath": "params_shard_382.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "8f81b62c19c7b6a1d1ef34671dc3dc77" }, { "dataPath": "params_shard_383.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "9011362e9465e823151819ad9cc22577" }, { "dataPath": "params_shard_384.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a54273aae3a983dc63ef9a996b797bec" }, { "dataPath": "params_shard_385.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1d04df3ae7648d3d5daacbf5182c0b8c" }, { "dataPath": "params_shard_386.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.62.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "af6476a28dea11be9a541fd5aa1e9e9c" }, { "dataPath": "params_shard_387.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8a015734a7aa04182fb1d04d8e62b6d8" }, { "dataPath": "params_shard_388.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "00efead556b7eb3734fa732517493cd9" }, { "dataPath": "params_shard_389.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.62.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "579f3cdab2a3297772047de3a8a719e1" }, { "dataPath": "params_shard_390.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "84788ea8883a713e6e64f8db0ee57ccc" }, { "dataPath": "params_shard_391.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "843acd399eb0aa16f3e701b046426047" }, { "dataPath": "params_shard_392.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.63.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5b64925585bc302984d27ff6c2bdd782" }, { "dataPath": "params_shard_393.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9f1497c8940739f65a9135e31b4eea2a" }, { "dataPath": "params_shard_394.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.64.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ac064ff5b8590527a346d6dc7eda2424" }, { "dataPath": "params_shard_395.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.64.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "32801e9546e2fc11090ee9aaf0900708" }, { "dataPath": "params_shard_396.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.63.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.64.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 29376512 } ], "md5sum": "baf6a46ba318e16aae83c5cccdb2aab0" }, { "dataPath": "params_shard_397.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "fc078e333fc7d6221bc9b7d99c354f5d" }, { "dataPath": "params_shard_398.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0483c787405f3a7a1bf7b63a1b7de1b8" }, { "dataPath": "params_shard_399.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5797e6414391bf371e1449cfb56c3ea9" }, { "dataPath": "params_shard_400.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.65.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1a61b14f4a715e4c0291d35f1c3c1bbf" }, { "dataPath": "params_shard_401.bin", "format": "raw-shard", "nbytes": 18907136, "records": [ { "name": "model.layers.64.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.64.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 14696448 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 } ], "md5sum": "eb6360d0280acd0881ec9f88a2eaa119" }, { "dataPath": "params_shard_402.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5405bcbefc2ef35c9732d617e06b13fe" }, { "dataPath": "params_shard_403.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "fbfec8e5f705880d9aeba1bfdd7b1e95" }, { "dataPath": "params_shard_404.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.65.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d75fe06a8b870733b4588b44cfcaf9c1" }, { "dataPath": "params_shard_405.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.65.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c80e0f3a5c3da810310eea82ff301234" }, { "dataPath": "params_shard_406.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.66.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "bd41cc2e51976e4615fb384bb2fc176f" }, { "dataPath": "params_shard_407.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.65.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.65.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.65.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "32e2b10fb44a79487c0c9c38cb184299" }, { "dataPath": "params_shard_408.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "97b28a95632622232b7f622f7c58ef61" }, { "dataPath": "params_shard_409.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e5cd47280ac8f590cff1fde2d31738ae" }, { "dataPath": "params_shard_410.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.66.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "797c448b5e5e200aeec0473ddbdb3f7c" }, { "dataPath": "params_shard_411.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.66.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "40693f8486e1a7b9632cf80e82bac214" }, { "dataPath": "params_shard_412.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.67.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d62c3fb1536ee021c3bdbd6aaba5dc11" }, { "dataPath": "params_shard_413.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.66.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.66.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.66.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "92f35c30d0dfa792b4bf2d9b96dcfa69" }, { "dataPath": "params_shard_414.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "aadba61ff8b3910ed9f9df8db0ee4536" }, { "dataPath": "params_shard_415.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ad60f7840eca30f754972d8878f0a997" }, { "dataPath": "params_shard_416.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.67.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "945c762db86a767e150a65b1229f3664" }, { "dataPath": "params_shard_417.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.67.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "da5dc9127d9f44798035dbceae949a0f" }, { "dataPath": "params_shard_418.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.68.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7bc2b122ddf0c78cf0708afcd4b61d73" }, { "dataPath": "params_shard_419.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.67.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.67.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.67.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "0b9305fffbedaaf24911af19bdadd0b0" }, { "dataPath": "params_shard_420.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "66f72ef03d1975f64d0cce44b7d40069" }, { "dataPath": "params_shard_421.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "7f75112ff91affca125445303acae6a2" }, { "dataPath": "params_shard_422.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.68.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0979a6d0a426da80cc8335a8d551c82e" }, { "dataPath": "params_shard_423.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c3e69d5b2401e5ac1ee50ed76e5eb4f0" }, { "dataPath": "params_shard_424.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b62da5f313cade9bdf46f280f1944440" }, { "dataPath": "params_shard_425.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "85822ab8fd191546692ff34f7f6b1ea9" }, { "dataPath": "params_shard_426.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.69.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "97f4d78c319345b7c7b5af8dbce2fa57" }, { "dataPath": "params_shard_427.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ce296c09349e3b4aeaad845dbea575c0" }, { "dataPath": "params_shard_428.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.68.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.68.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.68.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.69.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "9f2f037ddbbd0ffc39f386bec16d99d1" }, { "dataPath": "params_shard_429.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.69.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5c85baf680f0adc0dd64e8ec2c02acea" }, { "dataPath": "params_shard_430.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.70.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c15c9257f4f734c12a62b928624f8cfc" }, { "dataPath": "params_shard_431.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.69.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "f64d81f2073f47e56fb7e4a71620df92" }, { "dataPath": "params_shard_432.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e6efe49025eb43b796847016148af598" }, { "dataPath": "params_shard_433.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e29aa67f478254f98847deadc0824f8c" }, { "dataPath": "params_shard_434.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.70.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "aaa39214da8e39235d9bd00d6767f219" }, { "dataPath": "params_shard_435.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3162646b65fdcd5a6c0fe175e11c90ef" }, { "dataPath": "params_shard_436.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.71.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "47b979d4f3ad5c0cf5dae4eda71c6ecc" }, { "dataPath": "params_shard_437.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.70.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.70.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.70.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "eeecc1b92d814b89ea11fdbef92c203e" }, { "dataPath": "params_shard_438.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8d325f8f53a9ddd20e4c2931298a491a" }, { "dataPath": "params_shard_439.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "28fcd8dcf58337c246d6ff2f53d4d7c9" }, { "dataPath": "params_shard_440.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.71.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ec5e8cb34a7dbef73e5ed4be97027605" }, { "dataPath": "params_shard_441.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.71.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ff95d2a13e190a1435bcacbb37d721bd" }, { "dataPath": "params_shard_442.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "bd8c99d8ad9b13ef28e2a08ea2f4142f" }, { "dataPath": "params_shard_443.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3083ebdbe9ba24b5afc27b8030ae62dc" }, { "dataPath": "params_shard_444.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.72.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d6bc21a8b57f909dadb7f8fe617ade26" }, { "dataPath": "params_shard_445.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5111a59f5f457f0741a82518507f0ecf" }, { "dataPath": "params_shard_446.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.71.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.71.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.71.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.72.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "3458d0b74bfd4a165e5ac7d549e6c49f" }, { "dataPath": "params_shard_447.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.72.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "30342bc6a43799c32d8a4676cb061c50" }, { "dataPath": "params_shard_448.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.73.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9796baf351dd1d6f04f00ca430a90064" }, { "dataPath": "params_shard_449.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.72.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "7af43e6e40d7a078453deefd8fd8643f" }, { "dataPath": "params_shard_450.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "fa558c5e53ceb6dd8c4e952d2f378237" }, { "dataPath": "params_shard_451.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8b76dcff3e574c54e8be0dbd0471bff8" }, { "dataPath": "params_shard_452.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.73.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "04f94002f7f78d39dd9b8e4d053ce9af" }, { "dataPath": "params_shard_453.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.73.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "91bf234a59c218dc07a0f27348be48b9" }, { "dataPath": "params_shard_454.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.74.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "baa32d8b470e0d2db105c7f675d0cf27" }, { "dataPath": "params_shard_455.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.73.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.73.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.73.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "4c048e2854f48c1122b010c0632e5eec" }, { "dataPath": "params_shard_456.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "469fcada64b9d751c04af53fcd05782d" }, { "dataPath": "params_shard_457.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "be97d111316c1742df5d15776b2d7040" }, { "dataPath": "params_shard_458.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.74.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5c4cd0d2c7d9232777f5f9ea25712295" }, { "dataPath": "params_shard_459.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.74.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d0c740771066a9b34f610ac2fce75966" }, { "dataPath": "params_shard_460.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.75.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ca97ad08d8da8ac73258ebbdbeddef88" }, { "dataPath": "params_shard_461.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "db4355b857ac550e94052a2a201af939" }, { "dataPath": "params_shard_462.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.74.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.74.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.74.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.75.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "e881cc9caaa68de1590797fd92b5be62" }, { "dataPath": "params_shard_463.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.75.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5f4402c3885220559a7c7b36be0c7a3d" }, { "dataPath": "params_shard_464.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "25e426f642d33c1a074978d3f72b41dc" }, { "dataPath": "params_shard_465.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "65bf543f50c6514bf5d7e7fac02bd5ed" }, { "dataPath": "params_shard_466.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.76.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9cf6a5a28f754d9c8387dd159a318a48" }, { "dataPath": "params_shard_467.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.75.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "9c09e12512d2b41fdc5b9d956f694bb2" }, { "dataPath": "params_shard_468.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c1b847b2550900dd66a9d481663b4891" }, { "dataPath": "params_shard_469.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "252267f457592baf91b86cb941597951" }, { "dataPath": "params_shard_470.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.76.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "07703217a96a22abcf4e96d56e0f36a6" }, { "dataPath": "params_shard_471.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.76.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "94ec967a7e32ee4eed6f9555d6a1069f" }, { "dataPath": "params_shard_472.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.77.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0a4daa8446c6c65cc910e3d075f00569" }, { "dataPath": "params_shard_473.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.76.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.76.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.76.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "29a6686a99d6ed4963be2e9e8a088ac1" }, { "dataPath": "params_shard_474.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c1c1abc51e46977145d04025aa233c74" }, { "dataPath": "params_shard_475.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1b4aa462f97c0c99de03f7cd4399fa4e" }, { "dataPath": "params_shard_476.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.77.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "183c0ebf802a7e0222cd1b019a3b2b8a" }, { "dataPath": "params_shard_477.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.77.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "113f727481456f56950a7e80f49e9f11" }, { "dataPath": "params_shard_478.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.78.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b8b581138f501b1a6c5e9a1497e9effb" }, { "dataPath": "params_shard_479.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.77.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.77.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.77.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.78.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "2888f90cb86d78a4efd77c9c07feb289" } ] }