llm-slayer's picture
add model
d7f10a8
{
"metadata": {
"ParamSize": 147,
"ParamBytes": 2690863104.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 131080192,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
32002,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131080192,
"byteOffset": 0
}
],
"md5sum": "5236df4d04f62453ab07342ace9ac63d"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "5bbb8806c5c52f5b8fa1204aab8174ec"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "9a9ced9b15e7fe96449ab9879f39a7de"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c2fade5225fb488d6334c56610491342"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8388608
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30932992
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
}
],
"md5sum": "749e32c97aad4742b9e90caf3cf3a341"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "9ce0bf4542e4fe97ee580425d199ee8f"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e5095112881d0804c94a8b084a6a9733"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.1.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8388608
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30932992
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
}
],
"md5sum": "231a5cd144ccb30ae318314b23608227"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "7d42e4c15eeff80f77e52df4ceb2bf43"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "0abaa24f053f587e9edc818c28011802"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.2.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8388608
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30932992
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
}
],
"md5sum": "f949015f98f9bba9f0df2bf776613f3b"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "ddc0f21869ee787e61240e00b7a60d1e"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "1efb1abf6f59a5797f8be4262df36e78"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.3.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8388608
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30932992
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
}
],
"md5sum": "0416f555eb82990a5b4750c73f3971b4"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "fde70d3fbf23e3964a59160f25bef5c3"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a88d63a90234d431fa022cde58cd6558"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.4.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8388608
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30932992
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
}
],
"md5sum": "849bc602707e649cb03a3a507adf21f8"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "615449ac000262083d5eb3f9b4009385"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "37acdfc860edc57092f7bcafd7f6a477"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.5.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8388608
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30932992
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
}
],
"md5sum": "5c9e8abdddbdc09af7cfa13aed17a71c"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "44711bbbdd19bfc01307c09d06e42203"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e6d2b5932ccd2183c429b292acd04d59"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.6.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8388608
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30932992
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
}
],
"md5sum": "25ca45812d1c73be8d1d63539f0e2f10"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "c4b9f9c8868f00e930cfb9aedb21f97c"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "0f04b837378155f443ae22913cb03317"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.7.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8388608
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30932992
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
}
],
"md5sum": "0dd1c4f443e8f56523af21af40a04f0c"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "42c4a6bf934639f00ca80ed51847dcd1"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "90505af0ffe93522225df2e56bc0aab6"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.8.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8388608
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30932992
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
}
],
"md5sum": "45d54535e2451ae43f699ed32ecd6ce2"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "b80d76fadef6c9b41b4bdf827732e61c"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c9137c3f240c15f682707d7fa6ac3ddf"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.9.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8388608
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30932992
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
}
],
"md5sum": "6a3b1c5c34e386b6bcb9a3ad29668d09"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "c390e99990a0126f98d61f27ab2c68a4"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "612efdbcbe7541a4244a76d6c1c8f484"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.10.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8388608
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30932992
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
}
],
"md5sum": "30fac35a87a1e5628baac643050bc09f"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "b99ee6242e460e82dd53143c91c45f7b"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "91f6d3eb98038ec2a068a915f3f14973"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.11.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8388608
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30932992
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
}
],
"md5sum": "ff414be19f0b4d6522885e7729f1b203"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "a17cba4e8a82a54ebb7bfa0f67fa7f91"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "db19e04c8ffdbc2794a128855c5527e2"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.12.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8388608
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30932992
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
}
],
"md5sum": "60d9d5eed2b02fd0f12e0f2a85cdf1f6"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "9cbe6a6ac2ec3abf9902f88a2eb9a237"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "8b404bc762dd275697b79df4df22f9f9"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.13.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8388608
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30932992
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
}
],
"md5sum": "731b40cd63d88cdcbb65007b1fa72174"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "397b3dc04a75979d4866a04f237f49a8"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "cc8933cb7a1e5b2a08896c5fe577f405"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.14.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8388608
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30932992
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
}
],
"md5sum": "71c0be626a091a10f7535e124bdac1d5"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "f322f06b5d5403f2d34f38d4ca70b2bf"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "9a01f6391a3122b069730a70ee48b125"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.15.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8388608
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30932992
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
}
],
"md5sum": "6ee9b8b140875057942405392bdadb60"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "ab47f53c868ec49a3a5002e90e673c29"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f4d03d837be7afe115242d5d6ab9f1fa"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.16.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8388608
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30932992
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
}
],
"md5sum": "89cd4701668a770f350c7fae062a1387"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "ccc462a320c802f427b378ec82814f22"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d462e5fa68344d246660bc20a60313df"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.17.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8388608
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30932992
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
}
],
"md5sum": "4c50c77ae659de0aa23f697b7b5d2460"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "fabe7dd8ded46df7fde7d6fd93cc9ce6"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a719eaa0e0a9ecf9d094cc6aeebea333"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.18.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8388608
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30932992
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
}
],
"md5sum": "95825020ecb32bf61ef3e1029db7f353"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "621f309124cedaa397b831e5f0f1cf5e"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "3aae87d5b421d59211214a94b4000103"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.19.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8388608
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30932992
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
}
],
"md5sum": "f5b77c5932d669f4f4e6cf0faafd3c89"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "b17e8a72ffe031972f0bbc41c71ae713"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "404d21fc88e458be85b543de6ac7fc76"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.20.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8388608
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30932992
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
}
],
"md5sum": "d713ab10d732a2165e1cbad5180ed82b"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "ddc577228115c8d700e146100af96530"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "9afea93aa3ac17edc305aa2cd8f16fe5"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.21.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8388608
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30932992
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
}
],
"md5sum": "8b1a597873d1fc6155871575d6419e6b"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "34e19f6dbfe82b2e56b4f9deb05d5da7"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "53cabf917ecd982a1068cf13390d121a"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 30941184,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.22.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8388608
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30932992
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
}
],
"md5sum": "d862984cd4e8ea226193160b190f8827"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "95d1228e5ceb1f8a5f3618cf80882b89"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 131080192,
"records": [
{
"name": "lm_head.weight",
"shape": [
32002,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131080192,
"byteOffset": 0
}
],
"md5sum": "35f2374b89fa60891773b4fedacb8537"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 30945280,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.23.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8388608
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30932992
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
},
{
"name": "model.norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30941184
}
],
"md5sum": "ddff1a7887347b8a3c9b8e4ca23e3e65"
}
]
}