{ "metadata": { "ParamSize": 405, "ParamBytes": 7322112000.0, "BitsPerParam": 4.500366415925148 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 81960960, "records": [ { "name": "lm_head.q_weight", "shape": [ 32016, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81960960, "byteOffset": 0 } ], "md5sum": "4073142467b271eec22b4addf520a64f" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "55aec6357bd7d981feaf1aec684b37d3" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "53658997652208659ed1e8d7738b17a4" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "89147837e11714f1a7a5f042134993fa" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "b66f3593a0ab7a24076aac8b51fd84b4" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 32896000, "records": [ { "name": "lm_head.q_scale", "shape": [ 32016, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10245120, "byteOffset": 0 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 10245120 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 10255360 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14679040 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14689280 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14699520 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19123200 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 27970560 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 27980800 } ], "md5sum": "313ed96fa0680cef4277431c5c0baffa" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "0ad3d00c200a15b202948aaabe90f86c" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "31267155843a91052a2cd68dec73897e" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "821dbf29f33052983f36bd7932f4938e" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "3c9000d3a65dc87c25c18da67261997b" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "40d87756c164ebef5b20c650f2bb68f6" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "892a5d0f31b9f10a8974d6f7ee90a64d" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "0ee788eb4abc58765fd422de8ff11ca9" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "7e3d4452df1f3c3913e0b877cfc81ec2" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "15fea230b919ec30a2ca940676d41cc1" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "be9fbf92261a7c49b83d621c9a25f708" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "5a900bc0a7518eacfaf79cb684c0615a" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "274409adce1b127084db601566bf34e1" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "1411dc4c4bde5f946e63e7745c4c7036" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e3cdd59b5dce839f054490e0e40a01f0" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "3325d6b810c582de185aeebcce4ce2d1" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "876df54c28a915fe05b491b56e851e9c" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "703ba8287633ac35ae6b9e9ec3f4087d" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "5323f632af4dd998b3a6980cc9b5e72e" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "38acc2be04854c0652f26e23c3a3254f" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "3911cc48854a956c9842013e78ae5a08" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "7e4fe6d3248487ee882bcd334eb7af02" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "828b748199499aab69f922c8b1daf1a0" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "6a5baf3782ae14f9082224e0ef038b4f" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "195d0eb1cdca3d4b93ba9742a1fa3f52" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "e6aa0dbd51d5ccb4c8e523b3e2449f1e" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e8a63c1bb3cfb2b12463c3ca5f09f183" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "87671b485912b4657f287a17bae10e7d" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "15674dc14df817f675da0c42f4a5c5cf" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "3fe06ee3c84565e4c8768a94a738dd4e" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2e79309940ca8a5eb4743679662aa215" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "1b163d9d70f4cbf25ac996f46642b197" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "8f6b777b27afaeffe2b673808acb26b0" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 81960960, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32016, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81960960, "byteOffset": 0 } ], "md5sum": "55f56d15be3019aebd31203d7f62b7d3" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "3141a0208888522b101b88a8ec82b41f" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "7dfac72ef3bed4954e8ff23cb3b84aa2" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 29434880, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.norm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.embed_tokens.q_scale", "shape": [ 32016, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10245120, "byteOffset": 14755840 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25000960 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 25011200 } ], "md5sum": "ba4a3e616ae089d3363ca5fb9268650d" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "60f539a3d046485efbfe53f3d8cbca68" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "34d5e73ce93d1bc63dd969d246e8e532" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "647c4b419e2de3dc405ef8f04185b6a0" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "929fe70a4a97ae0f81a2b7a7b3fe9597" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "82eb7fe3da250a882c1fc6372809d9f4" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "8223955e527848968b6ad6692a9ea9f4" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a262db45d4e5da6f043624211b2d66a3" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "b83deada1c29276bfd99f9018c41a076" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "caf59e693913ecddb7ca360c1de8adff" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "bd64c9d45500e55617df0344f8aa05ad" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "55b3ebb4bbbcaa04e775ed1e148e937e" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "3d540661bc496fce1e687e34387aadbc" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "858955b4b4b93f1d1c7132e10bf39319" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "bf36b74df3887777c15fd50f257d23f1" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "737a459e9093ff85756c3e411c63fe4c" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "6f26b8f90da4df7eed8fbddbc1e0e60b" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "c9b8009bb7504dbdd59e0450ba5c5fdc" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "2bf4276ec32b69570defba35f0aa8c47" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4fdb41e22024770552fac89fe1d60686" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "9a8e1e4a61ae23e833cd90719c0ddc48" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "ceceef2ec44df087af87bc475c3453ee" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "0fa67c7bc6e2372b08f7d89fbd84640c" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "eddacbcb02987630c61df7f0c5064aa2" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "3ff048d135cf3d25110a5e3adb6931b7" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "12911462a0ca3be9e2f42dc6b712fef5" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "dbb926cf39ea6b4a514ad6dca46a8915" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "902812e3738a645a11cf41032134513f" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 33443840, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 28518400 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 33433600 } ], "md5sum": "aeefb333c09c319a7630e3460391597d" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "6dc7ac4beed2f93db148306867ef31d7" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "9848bc2351544bd4e7d46006447efb85" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "7cb3f81fe4e4577f3b9a6da77db92645" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "db6a87a6bc6b4c6afd62200705f9ae8e" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f06b2db535925e243394152f096d4d07" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "542c1c85f1b16ed34f2876c470b96d8d" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "56fc0351d5c046d3f34078a14a063b99" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "5924bf2771a469f8fc41bedf9cbcb9f6" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f2cfa3e2ca1674fbb1115c36ceb8d312" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "825a072c2de7022335856041152e47da" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "3107a382c4d0ddc8fd26b8cbd49bb582" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "4e3487218ed472e5c5b1e6f9944f16c5" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3dc170eaed937154010399eb644fb096" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "dcef90a792e01364433ad3acc9616165" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "fe863f81d5d29228ef068847fab67c9d" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "2cac949178271bc758b959d02b66d28e" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e6f9184e7501220360b12fed91e9c41c" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "84bfbf4c88924c7cb700b9fc42b07a8a" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "35042b4650e5cf02f7f5bdbe29b6eec6" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "ccef4c858bf6aa10298553821883daf4" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3893165fb517bf35df3af88d14b95c0c" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "1bc7d99bcba46697896f0f28849fb1cb" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "eeb1137c6b62ce8a7ec05e8457d65b1b" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "f14fc51e9e702edad526da691a8903f1" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "6bba09e090e58aee56a39cef90d56eb8" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "217d59720e3a5f97d693b20671d6272d" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "ba126abc9eacb51d7af6a5a6f5a1bc5e" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "e3cbf9857a3ba9d0159e067e12f36fdf" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3f45513abcedc6da8a7b77ab38a05465" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "04e9df5c42d3ee2171293f6dc6bf0d16" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "3944c4df74036701d5fed9bf75508119" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "efde65ddd16c6b3c32e2987e67e6a1a1" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "97daf35cd859e2bbbe963ea3f6b990d1" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "7fad816a930bbc7b9ac7327d02f12741" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a583de836068776c1697df60947a2d3b" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 32460800, "records": [ { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13281280 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26388480 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28037120 } ], "md5sum": "35eb6addc99689f98a89c12b0641342c" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "36339e51807fd088bac24fd14e015a4b" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "123d06b2ec3dae7694be30e19d519281" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "402575be75c1388bb223a31d9d688a30" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "e9d49584dae276717620e03de00135ce" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "db034f359d250af29630f99ba04ad01e" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "ee8eddef397931df15aa024b6c1abc86" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ad5f0dd29b78c04692ce2acba45ad490" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "3f59f0ff76cabb1a6109f6ac6d1c4da0" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "14ab7735130acfa139e651af21fa89d1" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "49a5974bde08d406ead78bced24778cb" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "29e71e7bef5340b9462fb5201fb2e437" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "09288dcae3c0f46aa7c2d86386c80d76" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "c2a8d008c7ca9fda787ab785e5e098bc" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "2c7129c84f955c0323ab280732f16d34" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0a5ecdec73798cabc16490559142ef51" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "053cdc9278198e855d9571cb88e6c39a" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "9de408494717b1e591d1ffacc65256d3" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "73a26c3f1cbb1b876e3f2c37c7ede188" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d05191a9c608814746b01e3bebd2f124" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "9d981af6f26cdb4eb302639861185606" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "d04bdb5408fa2cf08e28f22f6e7f9ba8" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "0596f459d0492364c40e603af9d73c6a" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c442dab0695ea5f54608ee947d631381" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "ba0fbe418a8784564fa542826438b55f" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "d94fd94761e30d8aa51937e8bae8a27f" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "a1f0a3af4d3a8a5db90a723f99e9f2d3" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "998b309d939d36e68bd4c59485336668" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "dc392fb61dfc3251cd3fc5bd92a38cc0" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "fa60bce875e75e76ab5d478be33ab41e" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "2fd864cc688380f20a13f0baac8aee66" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "80d5ef8012bf9aa03c2a13efed93d464" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "9cf86e28c09c91012615c3ebc65872b7" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "a8b9e52b1f7cd93e9377707684f3503f" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "69964ad2cc2032a1d0087b60122c6e88" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "bc33011e9b2f09bf3ab29c76b76af1ab" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "80b0bbc992956aa91e4df4532dbc79be" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "b6b9a5e0c8abdbd9ade6f4818b12fc2d" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "d23f79a79cbdb55897d2620e7b215c23" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0334d968748820f0678e59831670d78f" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "a459a87c6718d47a08de182d4d6ff981" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "bb7f19ddceb1bf15cc9040e64dea96bc" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "86ad1612e5c96cbb63f4d1e306293cd9" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ea166dec252f0bb78427865fbb5f3524" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "62ad3e5fdb635150e0ad1bbfec8ee153" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "8d894151163fd78ea7e5a33bcd9a798e" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "6a9bbab246fdb4d4e21db9c32ed75e9c" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2f8d0b4527a002c156e7a16ec1662ef7" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "697ec901a67d74cdc3fad331bf1962d3" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "cf32fbdae012109a3f3bc1547fc08556" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "24d0e3e9d1b0cf7684d80ad1900c60dc" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e073dcced17ea63515b62d9fe4b3b367" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "513713ec479a0ce3e2dab095ccbe5b84" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "7d0cc88ee8b5f7cd06cbe992f96e8b51" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ff6ac82ce082113d93978fdeaa99c67c" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 28518400, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26880000 } ], "md5sum": "69a9d21b97d30ced94ea1d1967f3b58d" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "f5a623e66dd0f9657067e36de80a7e46" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 28508160, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8847360 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13762560 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 26869760 } ], "md5sum": "0c2fa2268630b1145b6a2b7026f2a980" } ] }