{ "metadata": { "ParamSize": 485, "ParamBytes": 21092663296.0, "BitsPerParam": 5.000635812792825 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 131072000, "records": [ { "name": "lm_head.q_weight", "shape": [ 32000, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072000, "byteOffset": 0 } ], "md5sum": "f83ee6c5a5bca19120ee1cb0c42a66f2" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "3fe51b7b4d5b80ed4f8f21e75119a5ac" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "9e19b1cc6922322231b7f3141d4548be" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 27705344, "records": [ { "name": "lm_head.q_scale", "shape": [ 32000, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384000, "byteOffset": 0 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16384000 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 16400384 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27672576 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27688960 } ], "md5sum": "42cfbad5cab1e4374acd4ad00272f96c" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "8b1fc149bcbde4fcd9ccd1f67adeefe3" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "5802cfe91a70f2102381f7705303f287" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "732d0c9ffeeac63c45876a64c12c43d0" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "316ada3623875ae784dd5f1da5719f53" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "7a8b24c2bab286a41d64f13ce4d9d110" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "25673e75796c4963e09bf44ed2194e4f" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "39fd6de353ba8baa543e03e227a01e3b" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8fa773aeef8d324a2e4bf58866c39a4e" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.42.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "4c8c56d70c0c56395cfb1d498bb0f54d" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "62bf77bfb761755fdee0206ffdef0e1b" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "4fca63935fc6e4fd9ebee10752a1ae52" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "d37ab2388b67d64c3b12f8f36e45292c" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "b66738a375df0d4a1e01947682d28174" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a38af063396a2d4fd140c4603c2e26c1" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3a10019614ba6245d2e3449e63838475" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "bdf7d642408b430bb7810e3276d4795c" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.44.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "bea67d0348d3a038d69de693b0a6d7fd" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "a9861a06d6726ae82ad7264075affab5" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "33b4dd5eb4a9ec84b89284a3ce4a998d" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4a165d2733bdda5bd8fd5f3a7f06fc3e" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8c372975165dd7872f4bbefb78536f75" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "0a64f902198f888242b017a8c0bb7efc" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "6931f00be3c6abc70a323bcb8fa9b9f4" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "beb6e858367fd031855f83e701c2d25d" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3d9ac322624e4bdc407a64c09b5e0346" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.45.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "bbba912af842b75c3345fa033afa4c35" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "287e3283ead59cb14e936d7c660b2f64" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "8963fd30aeaeb06745fd5fc54988460d" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "ef71f494d1242115a3d262f888178542" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "69fb17667dec9e7c8a4f3b073beab32c" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.47.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "859fd4ddc96184561ee4ecc1af51b070" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "60987ea2a898c49c4f15bb5791febd7a" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 131072000, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32000, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072000, "byteOffset": 0 } ], "md5sum": "60a7e5040bea8db3f473e0585b546b65" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.47.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "9871082fff630f357ffadbdc971f782e" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "0855d18551d5783218b6cc47dd8fba12" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "6449d295424e50dbbfd10c69bc10d8ff" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "6a1ad3e36f669333bd5c2899cec64a54" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "573729e5baef0153c4878aa111a7b207" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ab30c26970e15a7d447d11dc839f2902" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 32931840, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 32000, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384000, "byteOffset": 0 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16384000 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 16400384 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27672576 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 27688960 } ], "md5sum": "9a9cad2079b8414bf9864b41c2366ec3" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "fc18bdbd2893f4ada47523b902782e6a" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "746beeba2d25ddc1b1236a6cfdf4c138" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "2d940b1c7d28aeb4f83c57dc89749598" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "db186dd87c082f4bd2333c4ad6b55278" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5f18075efcbc4c91a85a9e5ff3145b45" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "b1ed935a5656a63e4d535d6833756744" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 24952832, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 4210688 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15482880 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15499264 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20742144 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24936448 } ], "md5sum": "ee681c69f3e55ed9033b4f541e92bdf5" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "4c49934074069c62364ca1628686d570" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "941770f39527a7a7aecbde52a23606b3" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a0ef492ad1e3e7ed9f66135ece5a58a4" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "45e8c76f038aef2bdd22f27d4e97c43a" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "68315cb822e9f00587a5a8563c8dd37e" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "d31543a978b804b37d0b77db15b4f1af" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "1d7342b30a0179f7aed7cc76c32ee8d6" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "430ce7ee38d090514aa40ff2e49bc7f5" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "b210353a82a55614fc818e9a633da90b" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "62912e1242665322412605a7bfe7e669" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "dd81f234b52f198ca9b7c754520242b8" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "4b704d0a572300deffc186766745cf91" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "8afd540ae32c312c402914cd33248791" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a1321e30806a952dc3881d3e5598a4e4" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6853f5a0eb521f2571c43b8fd0f6c944" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "a2fdccf448237ed1cdfb1a8920c14583" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "af2817bc3d52f9d40ece2c421f45c953" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "bfda614dae05eaa052475f5ab0e14109" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "23452a3e5e6fb0712c5e8c3edd6fda57" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "dbe382ce99266a39c6b8b2af79d587d9" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ff459cea45630c94aa5c1506877db29c" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "ed8c2231d12ef847644b0195be018154" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "34e01c33baaeae29e20aad7c33c6464d" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8f908e7a0f40dac3d255e17809251029" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2b9296f4f2fe78a36e8dee04768c6f36" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "366d54f71e8d049152bc1d99ecf96410" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20725760 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25968640 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30162944 } ], "md5sum": "e23f3eb63d3439f1e6b8b29a8028339a" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "eccd87b19fe85556274f0fe0c2609b24" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "deae3faf7ec625e197948c456dfad31b" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ad270da7641ad7f496cfc516990e3b85" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "739c5afdcaf68ab96628d456185809bd" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "d144790da7d3b02621f2e5c85cc89be8" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "923339ba742238272fe85c90a8f17ce0" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "9d0d8cdf1285e48b48785d61658dce47" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "71f217a032c93567d01d7f8ba9760bf2" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "9e6d3fe7429c3a037a69ec32b5ad220d" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "198633f5fe847caea1a2c4c87193ad2d" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "eaa9666e16e3d56d3216afff352bc193" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "0949a4bcd3b1350287fc941c4d51f668" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "cb60d5d83d94529815631790d5abf2d5" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e080d49b67f2027a369476b9fa9cd32d" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "de5b102f99c627b1a31c3b7b14acb335" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "d5521abd6af8eb0e00587e2a5f4babce" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "a9412f4784f07abe82b0bf70a1540fed" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d2f88166114d311a7354c831a53c2037" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 } ], "md5sum": "dfeaf38d3a5d9d82ed8c2f3a42aef880" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "90fc39ccfa65021d4863b2e109aaeec9" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "8ec891bfc629997864d81e825f81157e" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "2be5f7e7debdaf2122e23d9d385ad17e" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "dacf0c014db7b8bb1d89fa962317743c" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "6294e58ad5038bb7f20ab9802d4461a0" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4b3c7951bccb33f14b4a92270eb4140b" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 32047104, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20758528 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32030720 } ], "md5sum": "ad93480e6860241dce64419922393f20" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6066ff03dbe7b1c104ef4a5de8202335" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "c94c0f9050a75e06abf5c675604392a0" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "abf3e3a13fee14714bfd70973d323913" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "f3a5896362f19c9e0df9d8c20481cf2a" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c0fc09ac9ff5174eb104ffefe18ee916" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e03fe1fc55d6d26fbc2f8129b0f30ba9" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "e5886fd16238e3c3d94b9accc9315198" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "41bd2875987d94ec71ed64edf851f2af" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "9da1b823f28532c06c34a3a094f7bfe0" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "34d305b423cfc2042c84d993466d753f" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "792a3595fdc8a7da196545a76389807b" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "916724d4392b5e09ececb350ccf4ac57" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "35a64a43d94951601fbde2f5b55ff8b9" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "989dcd8f1970d67d0615bdf0003abd4c" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 32047104, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32030720 } ], "md5sum": "3adbca535c0771531ad63d032c369edb" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "6a7a77c20a9438bed5ba16d93ba55f9f" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "c3eca17389d3bf8adbbb7d6c7b2ce7a5" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "88841c018f9dee2f57d08a0e4d3e0a95" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6f5330ffb926d8656f37d65be4059b14" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "6a06e6d5fb69030ce8367b95d4328f3d" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "0ebe708ee2b2c143bf6996fcfa72b045" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "d5b18c9dec27597f38aa895b375b1e7e" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b06abb823a3e032d5373a31d217e63d3" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "e736d6af94d7413c9ab6d0fab6f090ea" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "56a735ef80089f147104d8afb361f154" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "941cf9f91c610ba600f10ff83accbc98" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "50d3224855c9dbcd7c2df430665906bb" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "420957d755c5f2ea6dd0793633d8f697" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b1f4b7f237cb04c73900abceb401ea90" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "42ced73bc336098ecfbbcc4ff3d26df7" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "1d7cfa7c69d5a93f9c8923072caf1a6c" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "5a355f7a72bebfaac1615f5d4bdd30a3" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "544806b9d3052518eb3c40b9c703470f" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "ff415f5b830c4899eff3cbf4c6f70d5e" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c3f7c8de425da4aa98ca61a6c440cc89" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6064f62e935a7118af504dc64143f7f1" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "24dab5a3ae204dc22a07a69da6dd40ac" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "a890125843cdefa9d80d32eb24337b35" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "33cfb084508a76ac34164fa126e27ca8" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8b7479c49bc2eff687f30d912b7585c9" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "c81f674ee8a1653f8146fb8ff62dbf27" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d256aad0ad4f5de1584e2878e14a455f" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "3bfffa8be4b2c1a987fd2d1a9652c1de" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "fc7a28d3c46e0e194266147ab4823a10" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "39ad4c107d11ea8ed7747335ffb251df" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "80ef9f3db109b7d5c45740f6f8635fb5" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "795cea5d13befea0132dddae3d6d382b" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "8f8cde34a41eb8514a783bf6f6686b2a" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "a7871410c203884f982e309e076c06aa" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "626932862e634381bef74453cd7e44e4" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 } ], "md5sum": "c6fc14442e790be98308ae675ccc26be" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d106e0c367059e5402e0f14338406b25" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "46a50a4fafb77d55beed10aca8f053a4" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "5a55489a4530521c770bd061b618b04d" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "fa03d197989b0b8c7116e02987456a7d" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "c4f74e6878c2e6b4544f258055471eec" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "44f6d95971523d02b22f89050ac08d1e" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 32047104, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20758528 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32030720 } ], "md5sum": "a0c2224cf571e28cc614d7918b66b4ca" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5702ed0d24ea4ecd4953233787a567bd" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "7f63e7afe36ddd30482679b8774df70f" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "3a3d8123c9cf8fb50e6f8168b7208af3" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "9972a0d0dbe2aaf3cb20602aca27e2f8" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "502fa3f2946c4107b5a2b39e42fe3985" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8b5c3d0c05efaf91ad7f97177427305d" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "dd220de02e05140025c60565c22eba1c" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "d18f6b186e591a3506fce18aedad6a78" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "831004e927fc4346e0dbc830aa8b8daa" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "f0b6c23d7f76887237b4ad78b73013b1" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4f58775340d398d314bf57a2243e0c55" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "74192dc556b9de08a9365152b253d33d" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "e693442fe86b86c3dee255d86ce48fbe" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "8e8416e0d5b9118bbf338ef81f18aee7" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "7604d1537268d1b40089fa399b04ce76" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9517e4fec618f8f6006fca6c0940c4a0" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "df92a4ab2ef66b911b4a4d940bfdcb66" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "871b78af71daeed73eb4e5e0e8adbaad" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "2cf508e872a64a7385a4378b7d5c7972" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "ad7f134777a16910b1f421627be51668" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "c4145a607787b47c457a0d3f826cf657" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e164a8b814751b7b1bf65801411d8c75" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2c3d3c791b277b9d01eb23eb95efcc7b" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "c0282e1e695ac29b3a434291098f5651" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "6144e60ab2114e9473c980df2e3f3c91" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "20e04775fa97fe18d0f2a7b439176251" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "5d4ad291b02fa6557e063c3695e06e59" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "73ae1ff6af685ae88f70867b7d19942f" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f293b5ccaad94ebe53a3fcb717114eae" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "35350118f2f244206817064d23028e2d" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "5830764ff9bc026b1b4aaec3ef4878ba" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b80d36a5ed357e2e20c2ab51e2349762" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fe8d116b4ad441255ee1a388339ebe1f" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "eb50a32d476c76b37ca9e4e5bb14571c" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20725760 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25968640 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30162944 } ], "md5sum": "2d7e4eec62301f934625abb08198763d" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "fd0d5ed5e9f8d4959e895156855a6062" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "6cab3d06bd6a022a23c2a17cd3956a68" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "baed9824f50d16bac73dd3657eba8142" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "598d2b31dbe55efeb02667d3b929882d" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5b70fed9a37affafe7e854bb3c18475f" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "8d5e1e77e3dca02d50144c366bee521d" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 32047104, "records": [ { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11288576 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 11304960 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22577152 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 22593536 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 27836416 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32030720 } ], "md5sum": "522563b3d5859768ad963ed43c3c655d" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "8f71e642a23dd8711d91856907dbe7ad" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "18b0e45d1b38b0fb758af26f28322980" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "692296d4f692b7d169fe064e0489b00e" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9b7989017ecf2afce720810ee7cba87a" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "73bac27f25f4c86e011565b814a3625b" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "527c9466237f55bc0453d8f7fc7069ee" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "bcc2b5c46842dbfa819a89b7d0ef607d" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "455777b6451eaa38e60b83c2f55afa65" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "66eac92b02d76353c4858ee129c57041" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4087c12256b06984a47c1017bb7b416c" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "ae40161b95bb323b33439a7da29d20c0" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "3aea8609be88a55506774aeb1ed34e1e" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "febbcebe04890a6c98efb88d994795fc" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a3a64adbd8accd85548b613fe94e792b" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1f4a4f0a92dd88896b462b5d4a1e1999" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "177852bc7c200d68b18296f58cdedc6a" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "466bcfb9888c960262c46b7c93d883ec" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "0f6903c0e12bac594df876bebd7fdfb0" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "929de909696950144911b9921d80fec4" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "efe60020ef7221505d56a423050f69a1" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "134912b7dc14d8a9f6905e9924cf0022" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "1db63a528c0be86eead1a1a35b9a6130" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "45c9988351dcee54e9925e614658935b" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "7491731bc389d9e8681a79e9405dd5f8" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "89571fbea8e53370c6bfed75e04cb83b" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "bc05e86b64a567350875a11159bb6255" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2dda5162c7cc5d50edec9adce3a608e6" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "f5ec1d37f5da07f998328ed32ddd4110" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a29d2b7216acc24dca0fd12b94cc6d17" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 31981568, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9437184 } ], "md5sum": "c98364e4e6ea95b7dbdd1602727c4799" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b65b79a646e9eca50ab30d8a84a02550" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "358be6089c88bea410b11cc8c1d0e70b" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "accf38712a67b3c2b1e46663d294e073" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "f292598077d266a7b7130f46fdcd6134" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "3b1350b2b26387685a3b014235ac6cee" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f6dad98c0996d40844c3bc80d4b81772" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 32047104, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20758528 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32030720 } ], "md5sum": "d497454cb75c78835dbb9008ab396dea" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "023e6d25e94def96ab784a425768fc10" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "b88e272f883039e8b35300f6fe97ce35" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "15e96bfebbc0c02afab80bc6f7994109" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "6290b78a86fd27104c7f2194660e6969" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d27d86d1cf295eebc0504036c8c89202" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8eae2c1c63e1fd5ca8cf172c29195464" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "2207ca286047f2f61f539f2771e00970" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "8fcca603f94a08e24821e05dfec04080" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "a7a34042c88bbc10190ffb194c87d2ab" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "d427f8903fca9b6929f25bfc857ede1f" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "63db146e78e386a54a04401b4f1e7a28" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7d650d8366fd80970836dbe55f13d1da" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "d4a9971ee116d3cfcdbb2731c522f904" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "58d57a5d40a1e06b10d8e06155408129" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "1479c4ee21e75dfd0379c1b7836123b4" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "085fbf2c615cfbd58889fb81d8b5767f" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "1c5218059bad3831ad65f642befab2eb" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "77b0246d5506ef6856fa3fc18e2f9407" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "e1c8a77ee3e945a26e566b5c19462b68" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "bfcf64b35ce3f05f9baa2cb73474a87d" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "c51618a193704af9c927b1b5e37dd87e" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e6d22002c82e306544fcd227cd782c8b" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a8d6fc842017ac29cd49219921d53271" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "2c95f6e977e6f21df996a9f4cdd3e4f8" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "e85e05bb11c67a499b25257ebe6f822a" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "8fd8c638a0699ca082b8368afb7fcefd" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "e850d3043e170bb18e3b41854745df74" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b7cfe36734f23ab94ce112ecbde6b3f1" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a1839ef7605ff2bef70b468a1c707c5a" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "d9afdfe6098e885f457ba1d891d18d4a" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "48bc5423c443497ad06c91d4305206b1" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a830941bebba5fd6296fcfd3c61a5aec" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a44219a9e64b329825d879b6363f2d62" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 30162944, "records": [ { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20725760 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25968640 } ], "md5sum": "9c50cdcaeea38b369b41e8974b96d9c1" } ] }