{ "metadata": { "ParamSize": 147, "ParamBytes": 2690863104.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 131080192, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 32002, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 131080192, "byteOffset": 0 } ], "md5sum": "5236df4d04f62453ab07342ace9ac63d" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "5bbb8806c5c52f5b8fa1204aab8174ec" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "9a9ced9b15e7fe96449ab9879f39a7de" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c2fade5225fb488d6334c56610491342" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8388608 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30932992 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "749e32c97aad4742b9e90caf3cf3a341" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "9ce0bf4542e4fe97ee580425d199ee8f" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e5095112881d0804c94a8b084a6a9733" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8388608 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30932992 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "231a5cd144ccb30ae318314b23608227" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "7d42e4c15eeff80f77e52df4ceb2bf43" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0abaa24f053f587e9edc818c28011802" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8388608 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30932992 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "f949015f98f9bba9f0df2bf776613f3b" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "ddc0f21869ee787e61240e00b7a60d1e" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1efb1abf6f59a5797f8be4262df36e78" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8388608 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30932992 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "0416f555eb82990a5b4750c73f3971b4" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "fde70d3fbf23e3964a59160f25bef5c3" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a88d63a90234d431fa022cde58cd6558" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8388608 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30932992 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "849bc602707e649cb03a3a507adf21f8" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "615449ac000262083d5eb3f9b4009385" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "37acdfc860edc57092f7bcafd7f6a477" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8388608 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30932992 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "5c9e8abdddbdc09af7cfa13aed17a71c" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "44711bbbdd19bfc01307c09d06e42203" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e6d2b5932ccd2183c429b292acd04d59" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8388608 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30932992 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "25ca45812d1c73be8d1d63539f0e2f10" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "c4b9f9c8868f00e930cfb9aedb21f97c" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0f04b837378155f443ae22913cb03317" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8388608 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30932992 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "0dd1c4f443e8f56523af21af40a04f0c" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "42c4a6bf934639f00ca80ed51847dcd1" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "90505af0ffe93522225df2e56bc0aab6" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8388608 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30932992 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "45d54535e2451ae43f699ed32ecd6ce2" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "b80d76fadef6c9b41b4bdf827732e61c" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c9137c3f240c15f682707d7fa6ac3ddf" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8388608 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30932992 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "6a3b1c5c34e386b6bcb9a3ad29668d09" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "c390e99990a0126f98d61f27ab2c68a4" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "612efdbcbe7541a4244a76d6c1c8f484" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8388608 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30932992 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "30fac35a87a1e5628baac643050bc09f" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "b99ee6242e460e82dd53143c91c45f7b" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "91f6d3eb98038ec2a068a915f3f14973" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8388608 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30932992 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "ff414be19f0b4d6522885e7729f1b203" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "a17cba4e8a82a54ebb7bfa0f67fa7f91" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "db19e04c8ffdbc2794a128855c5527e2" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8388608 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30932992 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "60d9d5eed2b02fd0f12e0f2a85cdf1f6" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "9cbe6a6ac2ec3abf9902f88a2eb9a237" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8b404bc762dd275697b79df4df22f9f9" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8388608 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30932992 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "731b40cd63d88cdcbb65007b1fa72174" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "397b3dc04a75979d4866a04f237f49a8" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "cc8933cb7a1e5b2a08896c5fe577f405" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8388608 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30932992 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "71c0be626a091a10f7535e124bdac1d5" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "f322f06b5d5403f2d34f38d4ca70b2bf" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "9a01f6391a3122b069730a70ee48b125" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8388608 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30932992 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "6ee9b8b140875057942405392bdadb60" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "ab47f53c868ec49a3a5002e90e673c29" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f4d03d837be7afe115242d5d6ab9f1fa" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8388608 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30932992 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "89cd4701668a770f350c7fae062a1387" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "ccc462a320c802f427b378ec82814f22" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d462e5fa68344d246660bc20a60313df" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8388608 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30932992 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "4c50c77ae659de0aa23f697b7b5d2460" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "fabe7dd8ded46df7fde7d6fd93cc9ce6" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a719eaa0e0a9ecf9d094cc6aeebea333" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8388608 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30932992 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "95825020ecb32bf61ef3e1029db7f353" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "621f309124cedaa397b831e5f0f1cf5e" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3aae87d5b421d59211214a94b4000103" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8388608 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30932992 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "f5b77c5932d669f4f4e6cf0faafd3c89" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "b17e8a72ffe031972f0bbc41c71ae713" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "404d21fc88e458be85b543de6ac7fc76" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8388608 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30932992 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "d713ab10d732a2165e1cbad5180ed82b" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "ddc577228115c8d700e146100af96530" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "9afea93aa3ac17edc305aa2cd8f16fe5" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8388608 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30932992 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "8b1a597873d1fc6155871575d6419e6b" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "34e19f6dbfe82b2e56b4f9deb05d5da7" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "53cabf917ecd982a1068cf13390d121a" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8388608 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30932992 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "d862984cd4e8ea226193160b190f8827" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "95d1228e5ceb1f8a5f3618cf80882b89" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 131080192, "records": [ { "name": "lm_head.weight", "shape": [ 32002, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 131080192, "byteOffset": 0 } ], "md5sum": "35f2374b89fa60891773b4fedacb8537" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 30945280, "records": [ { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8388608 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30932992 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 }, { "name": "model.norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30941184 } ], "md5sum": "ddff1a7887347b8a3c9b8e4ca23e3e65" } ] }