{ "metadata": { "ParamSize": 485, "ParamBytes": 18982125568.0, "BitsPerParam": 4.500270809152499 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 131072000, "records": [ { "name": "lm_head.q_weight", "shape": [ 32000, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072000, "byteOffset": 0 } ], "md5sum": "3e29d46ce24b0e7acf0fbf4e4172771b" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "1ebb75ad1dbd0b6f951f59cf89fc158f" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "35089ac273126399e6fc3e45fe995d92" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 27705344, "records": [ { "name": "lm_head.q_scale", "shape": [ 32000, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384000, "byteOffset": 0 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16384000 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 16400384 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27672576 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27688960 } ], "md5sum": "50a28b46fbb4eb4ae2cab67630a169e4" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "7ea2536677f31901ce9b3a780cb669e8" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "2ad8e8d1c461c1b823cb70bf9db33462" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "766c69341595a182c0a9eee68a216e8a" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "65490a668d1961c69cc68e8bcc36ba4d" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "d4595fde9f3be6af29775169cf6773eb" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "feb8292e9ac13964c905029a2bbabb34" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "34d8b8c23cbd575821ca7e570f6a34d7" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a9976427ac11b51ac8b7cad1bd1c480f" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.42.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "4e4401717473facc8e484664370e20c7" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6b5ac156afbcae8f5b01f988a79df342" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "9fdf40074def3c0d312a3b5775d2c55a" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "e680492300354225b3711e28f0badb34" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "6da183f89ec664fc91e26a130298bcf1" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4d72cc6e8e71ffdc6ef09f7cdd95c85e" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "af63fa19ea15cd9d4942c197e2fa9654" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "fec7452aa4343c552cb623cbf652ca1a" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.44.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "e714710ebcbdd17a55820418ae388c8c" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "8d0de02992d55019d5d0059aa101d932" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "fdf90ed37b3807cbb6837e5f86816f18" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d67cce6444af02dad9ad5ed3ccaa9ea9" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "45c560fd503d0de3c077c4b6e62f5c9f" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "c83e30dc5828f15087e91214fdf6e160" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "d6e822fb3f039df83a6d22deef2973de" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "43b150a3529e2748e185ab1b79caf774" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c5aaa8e0390e1b593c9307143ca35c24" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.45.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "a1a78bd824f880ac72f8fa4f9f8feab2" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3ee0a446b0e5ec10b75369580717a3ce" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "cfedc30c93c46c4444c26df78f90c04f" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "fda8fff9a89b82fb08751b4355402b09" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "fe4243b6f23807a38ea0afc5ae23365e" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.47.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "eb4c872be442f79dcf374bb39d284bdd" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8d74b1e3f23f13df954b9d8ee55a0c6d" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 131072000, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32000, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072000, "byteOffset": 0 } ], "md5sum": "12bf9914a859e2b2a047060d849ec858" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.47.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "db5712187deab30ba40ae743654df56c" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "8b7f36438cfb82a91e174f7c2d538eac" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "705fdf9be157bd86c8eb8ed1e7ad0761" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "aaebd5421c68c68c26be10a8ae99bccc" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "dae46ffe9262726ab67108917e8c67c2" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9cf46e0fb16aff997f976026cc862d30" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 32931840, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 32000, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384000, "byteOffset": 0 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16384000 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 16400384 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27672576 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 27688960 } ], "md5sum": "cacc0b304a4de69db88a3b74e704dd7d" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "ec7ffbf21368bfae7c3f35232791c462" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "fe9aa69341c8545336704e99ed434b0b" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "d1e6d418669416ecc1dfde2839390dad" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8167b496bbab704d6e2f3f76b423e9eb" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7ee566a672ee00fd78c13770d9fd1d0a" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "04eaff01efa31152c480c1c94344c6d7" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 24952832, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 4210688 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15482880 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15499264 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20742144 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24936448 } ], "md5sum": "30ec5459978f11d692d8edc8928fe93a" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "bf166d0a30fe5c2fd3642f093b77fe65" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "d2a85dc204070711bdedcde92af12bdb" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "fe2695442ea902059ff1c48386f13d60" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b62afbcef4a64c26393faa24aa2eb5f0" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "64070a8415c1d38a4c1335b2730f96d7" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "9440e6f435a6979aa3518531aed282d8" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "383ca1be8f92cabd7c4d6d331cabb26f" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e75916909d9553aac6e779ff777492d2" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "778d75ebc0eabfdd2078207fa4ab60f1" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a50f81e7d9a72deb5e6efbf8311c80cb" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "3cf91de4ec57aebb692c1fa45b00a299" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "30af045dbcb4fa76163966c708e17fe6" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "b8f4f302ed8482d8326db502600ad281" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "444f89db8ac32e6d52321e12287675e9" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7c96aa2ed9966e58f70cddb722f8c550" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "ad88515ed5c79cc8d983da83455fc339" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "286caf2ed01c8fb594d20e0cb6d3185b" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "82f41c94d15e6026bb71ce24437cbeb5" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "6537e581077da2d2faac4efa373b7b81" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "53e5de9e8c74899e65fc70dc4ab2d60a" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cd1645b0b82f45db219acaa716e66449" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "b1f61686afd302c391e839bc08f2f0c9" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "e819dbaf63584819ac88451cc757d597" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9c5759559a5d890d37acbb989b7e28c7" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4dc12ffe38d0fae7ac730b36ab78a127" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "8872a6b68bd245ef4b6607a3f4652cdc" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20725760 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25968640 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30162944 } ], "md5sum": "a9574e1298b22739cc3549b53dde1e8a" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "8e047c3afb32489267be752bb41b6667" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "472378f5a02ecace50802276cf1f1086" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "10aa58b0c408b3e6a276d97c36ff514f" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "020025f6f28a495f45fa088fa59a7a71" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "968a1f951286a7f910563a8b944fa6da" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "1ce9e30a4c6eb44a6b518be206afc620" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "4dd7ae2a72b5af7ecdc39e1bbeb61ef6" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a8335c75e308ea176c711bb185930ed4" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "cac0c157888340b2b6a0c9221d7b3b5c" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7674f29758d8f467525f055fe4a6af8a" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "0b096e50ff5f8a488be42ae9c23c4b8d" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "c0ff7ca1f75b56e8eb3a8d29268203c0" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "4bff922dbddfd0eeb846842b3fbde602" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "709074a0cfa1064e76a6e2f15e2f305a" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "43b15eca90aafcba2f7bdf0b20c19a99" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "da561a99f63b3cc75a9b2846d2c54e13" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "56c4fb750e829e27b9bc42ee3bfe9fdf" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "540f7781d2ade6bdd5c5b1c194deb538" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 } ], "md5sum": "12e5127075fa0b358fb967558678417a" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2df4876b4164969b365ce0f2d8d8a7a5" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "1b210d6b20182aa60d1ece46d5a48607" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "511f95840acc612aaef3c80bb4ea90c3" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "165ca2df3d080cb4ae3f3c1f7aa6286d" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "b71aa8717f80ef4f400e07f657bb97d5" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a0118914174efcad5dab2cc758d5fb2f" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 32047104, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20758528 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32030720 } ], "md5sum": "043ab3737d873f1180f6e2df8f713925" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "772501f1dc94bbf568cb2b2c0c933bc0" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "832bb19e1a0ac0596266151ccd3f4b54" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "4e77c1382b154e6534edacd2ea9c5c4e" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "2ed9b3db11e3f265a9110e740ebc2b70" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3792def7d7ba88abb4b47fc531059f7f" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3844ed03eeaa0fe8789b60a7ce90e163" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "5f57c2ce65e9394536d07a768756e359" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "b1f16df4a3cdb8e0c2e18ff3ffdbed17" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "13a18e98582a479ac2030d23339a4258" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "c1f6aa960c4e8678d41b674492a0398e" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "98684425511569dae970d75e5c2f5e0f" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2aae1897b5db866a663dfd123fe2a81c" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "34734e22696d75ce3c82f2e1f06b1f83" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "fc68addd408afb5af7b1387f2682325e" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 32047104, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32030720 } ], "md5sum": "9ae02b356a50dbe05c498ca89f5604f6" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "d5e5f038df432123cfcdfe98000ae77e" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "453f0702b38f510b4364bffa87958631" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f56dda4a39077d2945669d991047a33d" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4614ec9de5b99b9a757cf5b658f8b7e9" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "03dec5a93e5d3f16ba187509e38c43ac" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "b4c738b39542fca04e59dff79842547b" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "7dcfe5433011dedd1a35884cbb8f3e43" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "cf5a104ebb07e85001969ed735f769d5" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "0458196202b35dc54119b07c97847e3d" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cca9b1098b265f8bb0f7b78e5a73e964" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "bce4485046adaaae5f430519d0faa84b" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "77f94045d012e18449c85fc91cb1d0dc" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "a0df38ba8e30250adb669923f1aa006e" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d97f920c5337ee8f4fc9ea084f68064e" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e6d6a0e98dab5c3da78cfb9f1bbd7dab" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "5374775bc28be1b5113187abb3fb054b" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "ec820e4ea598557cf7a48f2015c0fc45" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "5b41cb80452b1bda54d380705d034dcc" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "f9cd5bd3731b003ea1c04d12f6cc0eb3" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "29b4b899c737201bf31b0b162c21a345" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "841136dc41b20396c0151cdc08a6c4d9" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "e45237d7a829eeb4ece4dd84cc4a3bcf" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "56ad6b6477efa3d557a9e53eec6fb17a" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "369037d83ebdb8f552340cc06f37d7d9" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2f07c1492e661f41bb0cf20e72f27068" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "d5c84415242e11a33041a99bb27ca177" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "12ec85162f46d656a86ae391c7362858" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "f7649670ac9fa22c09883c04b1e6943e" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "abc1251973ecafd383048e9a7ef2b73c" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "24316ead71c49df051cfde0a24b3befc" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "159a29b20661bbcb2badca7e35fbe3d9" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5a5de55103db1ec2e6aaf279484bfc5b" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "1d9f2bad50aad9b5f95537d19a625b0a" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "56786f7ffc3b6161ef56625a1ad425a4" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f5268e2d8f39e938967ad461485c35a4" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 } ], "md5sum": "e685b41c5d97ba0d421fe28f1fce7265" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4fc9d2514b38c80a091e01b79bbae759" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "c15af893be7300cc29c8cda46bf78942" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "81a7594c9d917152b80a00c0f3add258" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "7b3925a2a80130634689ca7090225c77" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "6ba908553afa9b94a0510f1d21384b64" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "73e98a37356be9072009ab221398dc7f" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 32047104, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20758528 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32030720 } ], "md5sum": "7e63348a3dcf3d4a59d05fc8a3c4fb84" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9bfd3b5d0b0b5600a57be2b91e6b8920" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "ebf994365f7e6dee83a4f6ca93091410" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "a445d6022476daa13c62324f30d0b75b" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "1ddd55657072081d480458d4e45d6ff5" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ca133cc6e2170570caa5fcee545d25eb" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0f1bfefaff569c38eefa97e4737760d1" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "0fc95b6784cc6e274fe00ee3eba7aafa" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "1e766f418d94098aa7842e7d55c10ab7" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "f2262de3c9b3f2f2ed0d9904c10a5ada" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "a6ca59c19d4e0807000e7268ea37f101" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ffbca034f86913975fb45dbc2a48b375" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "85888e9655b2445aa242144b7b13c21d" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "415386e30ab37925f47e5b747613602d" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "e3543b8534322b9160f7f1bae2ac2927" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "252065824b28896455adcd97b5823d68" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3f52d130f8b94f9849a315631f85520a" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "28cfa0779cbd7ac7f91fded88b5d737e" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "06154242891e0480a396021f19807148" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "ae6656af355b1ee734d309e6a80347c7" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "1031e761baaebd60d7ee247d11d7fd77" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "7f0b1f52a33d22dd55b4efa68ad6cd19" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "20b43a5307bd3d80bdd9cbd0a386bea8" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "70436b22c2b26eef4294354dfe0f2541" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "1f4590f1847f5b6c45b5a267e0f2446f" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "2451813a82d745fd70e8e6c62fda0ef5" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "a0629837afab432067338de54c3204c0" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "58e3fb24910f38bb315fdd2c3cb4e3dd" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0bf60d32cf698405baed07378aaf774a" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c5e31511605cc44f4ed612f33f34c9ec" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "46dfd05617c6fd226109319d4afed07c" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "eb00f2b7813434bd71fac68675eb7be9" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1527f5c9ce95adea6bdf17ab4cad0b43" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b31b95cd21fae8b8b8deb3bc69665372" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "707d1f6aaf6e12499fd5466eaaf974fb" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20725760 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25968640 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30162944 } ], "md5sum": "075025b52cbd6017ad0f67bf56e2ff38" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "d5e5d0449f2155eac410ff07a5084f36" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "6fe3240a1edb82f5e4ce71c30c0d1a97" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "c6bde864dca08dacc8fed57d900806a3" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c22c3363efd3b0372fad6a14d153effc" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "35fab030696904efed8f67bb88c8d36d" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "5f07a62e066c2c135199a9c7a76507c7" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 32047104, "records": [ { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11288576 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 11304960 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22577152 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 22593536 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 27836416 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32030720 } ], "md5sum": "b1274f99b9015e05629432be297b7145" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "6886ca6ebab84b5a5b7f1dc5dd5d3425" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "a4541a11dba0994698efceef84d23a27" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2c63e681c615e1a76fc9d219d5d873e6" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "699771ee3c0b6cdb9ce3831dc13a7aff" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "fafe876877c42a7cb96bd0f9371c4213" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "8dc2676c01ce9e75f2e69400fce38c0a" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "4e1e3bf0b1052c506bfe2958bca78765" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a2d744304ea7b2ceb0da3899bdfdd288" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "bd34cab085a639e73fe2645ec5173714" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cc787d86dc2559715adae4b4ae3ae13b" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "c2bf9dd650b5920a8773bd0f54cac88f" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "2321553747353fa3b0ac0a6f09469c3d" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "15e523da987debed681f969072628564" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "09634855620306a7a1917cbac953c651" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8be463cd8f177da8fe975623f7245dac" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "2f09335ef4618517100a027881bf3969" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "835e1d251b5e8933f15f6cfd75710172" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "b4fe52a2f6a64750e6fbc56a8ac5981c" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "5c05a994a6fbe52800fd317078388b2d" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "92c08e32792b03aa115bee5dc81e10a6" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2f637b069258db632ea5b9ff75ac375c" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "45de86cdc50a426b16e4335c7c783e8b" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "9dcbd35f179809ebdd012c6085d18ff8" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "4ad03a5f05b7bb16f526323ff9a5884c" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3634492be6d897bb5e5a1c201d6680c7" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "9cddb6db59159539545d6cdcdb6844b3" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d751642793bc9795e59664d6d438a8e7" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "c442fcf1a9d8943329a097f94093d65e" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a52d09b81da64cfb2c451502c7ad19de" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 31981568, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9437184 } ], "md5sum": "4ca2d40f1cbb8b4b0991e5f0f51d5a47" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "112401f20535932c3f99eede7033cbb4" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "baf2fa15e3e74721a8ebbd806c8b6ee6" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "491683fb911df5a711bd5b616966c7b9" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "3ea3128b8444217b0508d3c503d180c5" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "0209a2adc1b4280bb22c3adce50d069e" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "79874ce969d340d16d13d127c58ba59a" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 32047104, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20758528 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32030720 } ], "md5sum": "1da05eeee0d9e41ef3d60eae8a2b0959" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b4de10a3236727de057b3d4178fcffb6" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "c2292447eb53c216788c271fac0b88fa" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "f783b739dc77c70cbda5de1d8ffe9377" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "a2660d70bbe50279e8e9cfea1797c4c2" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c340fe6592d7426a4f72f4b606b026a8" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "50ca47cc42174cae9268ab2017b6c1a6" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "221234820b84ca8cf0e849e71d7497b1" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "545cc59fb21258c72b343ccb3c073ed9" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "323fccc9cc0f56a8aa43a99ffb5bc5e8" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "f7793ea955e1961cbfc3ba7a7a9a5df6" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "907c9b54eb49949d6ba8df2bc730bfc9" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "86543b816e256cbe106d28d39b516be7" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "bbdfb02262f8aa7e0ca4d7f27ae8d9cf" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "c841260de0c6e7e8769a2fd76b0e7352" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "691a8bd2895631838766cb489c90f194" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0c842f0cd96a807e4d253c062c2b3d0f" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "97f7a682b7348ad2197385cc1b7d6150" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3ccd9855397103ef0ba7f75ae755680c" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "32266e1d8c42ca18c89db7bd0287cf5f" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "7b3dc421630f53e5700aa9d891199449" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "a6a739179cc92ef42a73d34c3e96af9d" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e15057f3e7359fda9ebfee712947d6be" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "07844e69910aa1840e0713391e7db466" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "766ba95357f19e246ceac6772f9ba245" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "e58b0fb482d7b158aaec5ac3417f5157" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "d4fbfd4e6aa3087878ea4954458351af" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "5e7a3fade972ee897b79d7f0c1315570" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e0fb3527415085699be2cec42a76de62" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "48861aac881585c5c695ce7ee581fda1" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "aa9711cf076da81f3a309da0d2995a47" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "df08277e8450479088bf918d288d5c86" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "045ba7ecf462a9be68ce3b6892b82014" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c007b682e987116829dfd7ed4722f143" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 30162944, "records": [ { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20725760 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25968640 } ], "md5sum": "e911520af24a8d92158fd4bebee0357d" } ] }