{ "metadata": { "ParamSize": 357, "ParamBytes": 26272210944.0, "BitsPerParam": 3.711629079784502 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 65536000, "records": [ { "name": "lm_head.q_weight", "shape": [ 32000, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65536000, "byteOffset": 0 } ], "md5sum": "f084dc6992d7b9cda35b0a08e67749c5" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.30.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "6694945f217b115d9d61add41d3f749e" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.30.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "23ffbf67c0a2146702affb3d24113d88" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.30.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "898ee52b7c6ca38f3b619e44e7e5e783" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.30.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "cd8b58ce499f1fd9728e66ec590c4766" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.31.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "213ac0cfa38ed1da04ee95d9f96bf976" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.31.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "efbd93ca2c2915f88f717898c3924995" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.31.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a9551a4592037159776ca6cb57624ace" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.31.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1c8635fe595953945f19295c7cf942ef" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 65536000, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32000, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65536000, "byteOffset": 0 } ], "md5sum": "5786b467996342a94d9a4dfffddfbc9f" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 31891456, "records": [ { "name": "lm_head.q_scale", "shape": [ 32000, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192000, "byteOffset": 0 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8192000 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8200192 }, { "name": "model.layers.31.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 8208384 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8273920 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8282112 }, { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 8290304 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 20873216 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22446080 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30834688 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 31883264 } ], "md5sum": "ee801c40285570d08d706afa2d5d11c2" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.0.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "4a6e03aea0e7d134800cac4d000ca7e5" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.0.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "5b57c83387a1c8f0a26a0d8bed4ad7de" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.0.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5f9636c51cbf1d44efd07a9cef29862d" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.0.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f707dea9c5dc6bf53a1a9ff44037755a" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.1.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "cde0279e39f4384e7951fab2719d028c" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.1.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "10210a47b0eee9f0da0ee73734b2ec4d" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.1.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "46dc0245928aeb0f36a8980b8c4914a2" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.1.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0df31746fd968763e17a06d24b549cdc" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 31932416, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 32000, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192000, "byteOffset": 0 }, { "name": "model.layers.0.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 8192000 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8257536 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8265728 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 8273920 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 20856832 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22429696 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30818304 }, { "name": "model.layers.1.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 31866880 } ], "md5sum": "a6f05ea9635e7573129066a588b81938" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.2.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "dc1034c152401224773974e9769e0d1d" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.2.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "c9cb8d09cbe5e9f1ff8eb6650e847192" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.2.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "11362d56d228d3446429750145988b63" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.2.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "42f3ca73afa8531c0c02495f35dc995c" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 23691264, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23601152 }, { "name": "model.layers.2.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23609344 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23674880 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23683072 } ], "md5sum": "ae65d646920213464e62e24801ed9f25" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.3.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "09add6a8b89325ab9b13f3b7ab4627da" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.3.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "48da5b5fbe70bbd985f884111fd73d3e" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.3.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a0eb4a475a307c2ce86178d624856aac" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.3.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "bb462f70e6a01c0c60165aaa3856c4d7" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 23658496, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.3.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23592960 } ], "md5sum": "5d61d616a32dff6f676b2b1c84eb6cf6" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.10.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "7c2d372ec8eaecd5c566802a9ef672b6" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.10.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "6a64303b4d5aaa89617b44c2d52070a8" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.10.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c0856ee8654b07ec5f228997c3e97fe0" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.10.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2b8a698c23b5f0d43d22eb8df8014e91" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 23658496, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.10.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23592960 } ], "md5sum": "8efc14922491f7d8c7258150ca7b591d" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.8.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "49bbf6f3018719505dfa41105ea45d29" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.8.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "93bbaaa323aff6ba5158e58d07c1e4d5" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.8.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "d5401eccbea1cf163ee586a11221bd89" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.8.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "f58fed06a3e10d6dd046e9a03b592097" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.9.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "ff69a35b0ada0865cefcdb0cd6e8fd97" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.9.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "6b3d6401568d11c8ff854eb524c0222a" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.9.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d0a467f150c85f78f34c490ff9e0d48f" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.9.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b9a025ad3ceb4aad0d4ce46f22b762a6" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 23691264, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23601152 }, { "name": "model.layers.9.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23609344 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23674880 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23683072 } ], "md5sum": "5a6529f827c4f0debfcd0118196576f9" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.11.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "0cd07bb73ea20a74533168b847951647" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.11.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "8cee07b4a79151dec86c749864e1240b" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.11.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "47139f9a5aef4978d87aba9ddd5e53cb" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.11.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "65e8556e6aa53cc1df59e01462d9f835" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 23674880, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23601152 }, { "name": "model.layers.11.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23609344 } ], "md5sum": "173c26d0cb09387e641e6605ce039864" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.12.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "a86b34cbcac452a9c95ff906e3895317" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.12.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "f556df7a75dcdb976e42a947994ce739" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.12.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c3d35c112ac4d987d16c9430894bd3bf" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.12.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c73b4bd62c19cd7eb6fd20619ce51f0a" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 23691264, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23601152 }, { "name": "model.layers.12.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23609344 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23674880 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23683072 } ], "md5sum": "4421f3921a74b29ef71cc6d417ced955" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.13.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "82278f0c9cc9cddc199f9c0b4e956bbe" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.13.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "34bfdf1119e1cbe31e3f08c2e27f147f" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.13.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1a99855adc0f327c0973930fbab872b9" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.13.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "7db6259ea5a9fc64fcebc26a7d329538" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 23658496, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.13.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23592960 } ], "md5sum": "e5c22ba257e1cce441a6190d9b7ca3b5" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.14.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "eabf51e21262988f0048b6816ee339d6" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.14.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "c099d63f696cf0249f8349dc07f55dd2" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.14.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2c4e5be5c7c6dafeb8cb6f002c90f334" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.14.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "578dc84b751843c1f6404f08a81ab1eb" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 23691264, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23601152 }, { "name": "model.layers.14.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23609344 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23674880 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23683072 } ], "md5sum": "421fdfa8a7fa17da2221570d3f89c7b7" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.15.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "09b4f9ca06e6ba8b0c384bea779a9606" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.15.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "a1ebe677819d01ee46ee8d78a0d9169d" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.15.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "71207535df5e8cdf7ea5aa2e29132f3e" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.15.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "caf35feedbf7a979fd1cdf80f951b4ed" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 23658496, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.15.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23592960 } ], "md5sum": "d4a9d6b303f3b523d6185dc2ad04c69c" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.16.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "fe044fade8def41c0ff0ec6393b7d528" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.16.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "99a39d409bfcd853c722503375174680" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.16.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2723df0336f9db7863e77103f7936148" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.16.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "384f616ca117317dec7cb06d4f72a39f" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 23674880, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23601152 }, { "name": "model.layers.16.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23609344 } ], "md5sum": "cb8406798ba2f5b3eb3c91091457bd4e" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.17.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "dcd989cbd29bad80b14378fccb50da24" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.17.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "aed1b1b5e8f9e4fff668d3fdceb71b25" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.17.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "40b124b535a49d199cab6511ca9d9cd1" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.17.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8f3ab9c711fe470163e72f4cd6995930" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 23691264, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23601152 }, { "name": "model.layers.17.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23609344 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23674880 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23683072 } ], "md5sum": "e84a9d2baef0cdbf36cb71aaea73bc56" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.18.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "3a85cefe621a0811cd4086c6fe747f58" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.18.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "f7ef917f141345aadb9e806f50f1d21f" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.18.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a19b349c8997ae9973d8b35657f1446e" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.18.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "162f62f8baeddebbf6c9e65f900a88bd" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 23658496, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.18.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23592960 } ], "md5sum": "56b7aa17f7e45bf389b5498afa892f22" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.19.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "3cbd4d179cabddef49d003ffe1bdcbac" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.19.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "e420cf490657200f53745c82179caef3" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.19.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d75ee4b674c714992e8ed8d2ec4dbae4" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.19.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "68c67d044fa3c9a4cfb5f504236ee1a9" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 23691264, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23601152 }, { "name": "model.layers.19.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23609344 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23674880 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23683072 } ], "md5sum": "3549d9c659990885667dc4bd609cf076" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.20.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "534092fc23d3b0ce0aee347f424d069c" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.20.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "359ecd7e5daf9e890d1ec2999506a352" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.20.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "542c327fcc15f5a85fc09182d13cd3e5" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.20.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c59445a431259d6a2416ef91d877478e" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 23658496, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.20.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23592960 } ], "md5sum": "34afe25ef785e653a42efbbcab096d69" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.21.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "1a1efe0ef75ff5ef57b5a2aca3055912" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.21.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "4326aa1a10a88b73cbedca1ea75bf4b6" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.21.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3874d56d7d5fbd6f3cbc1330a42c7323" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.21.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "99904688765535983db02d7e52a6f44c" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 23691264, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23601152 }, { "name": "model.layers.21.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23609344 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23674880 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23683072 } ], "md5sum": "6a831c4fd7ccf03a7a6cdc40bd230f58" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.22.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "0bce36079d8f66187bb46541d15567d5" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.22.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "1afa300343dbe4930c831fc4df68c5bc" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.22.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d8667d386a2851b94c2411bfbe574d4b" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.22.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3d91a4ddd5b9bd277f9a9a3e4a70af80" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 23658496, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.22.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23592960 } ], "md5sum": "c26eff0d35583beb2a36591e4008a9b7" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.23.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "5a3f11bbe4c1a4b45b795cb9e6a6cc56" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.23.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "7b3feae474c2e4470285b7df8c1e7ed9" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.23.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "65607c6b8fea09bcf69885a7b1b190c1" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.23.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "50b93e2d39b3db40847825b43d70c12a" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 23674880, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23601152 }, { "name": "model.layers.23.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23609344 } ], "md5sum": "d7ed2dfd80ec7f5a8426c1a82bcc4a03" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.24.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "e2cdba1e90f848b511082d2a2c302110" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.24.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "690a6c1ccdc114fbd698adcc889c4100" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.24.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4fa1948d598e121066632a72d9c1aafe" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.24.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4a5abc018a50da0e4155643bb57e96b6" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 23691264, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23601152 }, { "name": "model.layers.24.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23609344 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23674880 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23683072 } ], "md5sum": "e2b3413adb86e12c65526f34bca62060" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.25.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "ff700cba2578659dce26e2e49cb7e332" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.25.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "4bf529bde7c2ab1ecd7515b2a626d941" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.25.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f41999f7613e52dfb6f1c6fd479f75df" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.25.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2ab691cec5ab9ac30e66bbc08505e5d5" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 23658496, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.25.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23592960 } ], "md5sum": "d8129bf3eb41d835e84ae4a557db4cc3" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.26.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "f69a4cfe251d8b889f12947fe3acbd6e" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.26.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "3618381a4e821b0e8664ced6c04ee53b" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.26.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a68028c14cae996cdf983f9559d616ab" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.26.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "fbd3917a31ed626effde97fce51571e1" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 23691264, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23601152 }, { "name": "model.layers.26.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23609344 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23674880 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23683072 } ], "md5sum": "5a17ecd6b0928f308268c7d8b27f9ff1" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.27.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "f54739797454d03e5cb827e98045b285" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.27.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "1eacaf709acde4eec01909383abdb277" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.27.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e0834c1975bc9462dd797dc9635aae55" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.27.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3a9aeb917a11ec2f9a70c677f1ace648" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 23658496, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.27.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23592960 } ], "md5sum": "ec7f60b6073925dc0c7311890bd2e0be" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.28.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "96d1c187b6b17b6e22ecfa58b3fb8dca" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.28.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "598fe22c2571a90d800cc875ff487ca4" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.28.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1d0c2fa805d1b9e4dff3a1eafcb84177" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.28.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d85020676dd429ebdb4c24852a863af7" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 23674880, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23601152 }, { "name": "model.layers.28.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23609344 } ], "md5sum": "a55664f8e3d65f6f9240c3d7ab420bce" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.29.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "80ce0db7f60f6fd2c32188589865029e" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.29.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "1e8f86847060eea69798434184e8156a" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.29.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a3585f8f6cfb4abd8702dd6f0e714c3d" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.29.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3f48cb80d5a79cba9c7fe37e25133ab5" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 23691264, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23601152 }, { "name": "model.layers.29.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23609344 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23674880 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23683072 } ], "md5sum": "c9fa3a0a243980e95ce21ff15a8f4655" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 23658496, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.30.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23592960 } ], "md5sum": "275cc0234601262d511e7c088652096b" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.4.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "d24b117d5691a9f71f624f3f6c6116cd" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.4.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "bd48d3f4fc1241c1d7e115f10a47af6c" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.4.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ae19af7b4903c6393f48d2fb5bca0d4c" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.4.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "bb0f2618aae72d82b7bf460ad02232fd" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 23691264, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23601152 }, { "name": "model.layers.4.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23609344 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23674880 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23683072 } ], "md5sum": "30483c7e85c8568e52a5a5ada8b72e68" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 23658496, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.5.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23592960 } ], "md5sum": "48b203fd94b3bfa8f928d85a02189230" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.5.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "87b1d610b25a6bb06650f53406bc288c" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.5.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "2f112274f5d4c5abf1890e3c89a50e94" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.5.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "af7dd569e7ebf62b0b83d29d6a7b9ccb" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.5.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "112862742cc33516ab6a6612b3e309a3" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.6.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "392dfd664889f6d079d1a58947d44474" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.6.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "cee58726a69efedfed9a3a6d234e271c" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.6.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "25eada4ef6a5da26fce63fdf7d54b3c2" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.6.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0793bc3766012e85424e0d16eff0f925" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 23674880, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23601152 }, { "name": "model.layers.6.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23609344 } ], "md5sum": "978b4f47cff5f309b7b2b61c04f78958" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.7.moe.e1_e3.q_weight", "shape": [ 8, 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "bf0b1e9bf0f2777eedc041e2739574be" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.7.moe.e1_e3.q_scale", "shape": [ 8, 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "78c5318198936e3f816afabb242ee16a" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.7.moe.e2.q_weight", "shape": [ 8, 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2bf0d254956f30039ad0ab0e2ce21261" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.7.moe.e2.q_scale", "shape": [ 8, 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "378509a1a5d221955178a17c8200735c" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 23691264, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23601152 }, { "name": "model.layers.7.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23609344 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23674880 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23683072 } ], "md5sum": "8ddb96c3b2725d63a3d972c12e4f85b5" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 23658496, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.8.moe.gate.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 23592960 } ], "md5sum": "109fd4f9a5a5ac44d3cd7eacb7dbc3ad" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 23592960, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 } ], "md5sum": "d4b9e4d0210f965a364ea4778a76bb77" } ] }