Qwen2.5-3B-Instruct-q0f16-MLC / ndarray-cache.json
riczhou's picture
Upload folder using huggingface_hub
31f3539 verified
{
"metadata": {
"ParamSize": 254,
"ParamBytes": 6171877376.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 622329856,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
151936,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 622329856,
"byteOffset": 0
}
],
"md5sum": "f58c0acb41b85d8e7d5f5c53d9501526"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "0bfaa9620d061038eb70bf4c0188c447"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "45649b9a88a54d3f210ec7c6b762aee8"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.1.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "1fb6637c50eec204f716a6e2b7004948"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "826a05e4a27388aafc5e4a489f6ab271"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 29386752,
"records": [
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 0
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 4096
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 8192
},
{
"name": "model.layers.0.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 13312
},
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10499072
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18887680
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18891776
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 18895872
},
{
"name": "model.layers.1.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18900992
}
],
"md5sum": "4965e678f27fd5965a9dc4b529721a61"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.10.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "9dfa1476617ee80bce853a1d126ef23a"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "e497cf0aad47d8ef2cbae595c67ba1e3"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.11.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "73bff5ef420b7df2698ca90bf2d4df51"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "b67ddd9c92d023512982fa3830353421"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 27289600,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 8396800
},
{
"name": "model.layers.10.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8401920
},
{
"name": "model.layers.10.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18887680
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27276288
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27280384
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 27284480
}
],
"md5sum": "1688f84e4f7922f76817b6bb23ac1384"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.12.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "de14a5223492bc23ce31d025d7344400"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "b6672bf8dba2cac2b56ff2a25b32e42a"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 29373440,
"records": [
{
"name": "model.layers.11.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 18882560
},
{
"name": "model.layers.12.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18887680
}
],
"md5sum": "9eae6a11b421f3ce61171309b12aed24"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.13.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "c0a03bfde4dbb8e009498dfecb3e8265"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "002e4817886977ee630d7042d6d5a09b"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.14.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "ace1c86724325188be11dff0efc4ee5c"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "dd6107ab54b454867c2be25833ceddf5"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 27289600,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 8396800
},
{
"name": "model.layers.13.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8401920
},
{
"name": "model.layers.13.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18887680
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27276288
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27280384
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 27284480
}
],
"md5sum": "29896a6487425c50ce734d63c64a0045"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.15.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "b9813a99205422ef528dd478f0e0426e"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "1a9ec90c169d1163127c0060ade7e033"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 29373440,
"records": [
{
"name": "model.layers.14.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 18882560
},
{
"name": "model.layers.15.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18887680
}
],
"md5sum": "b815ce34bc6d755e86aaa243d10e785f"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.16.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "5bfced7df7530f5b0e2b5dd35d9e483e"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "a4c0ff835bf6654a4adde03177309a07"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.17.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "c5eec349a32db23bd55b8ec57df3ab41"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "88771c127d151a4f3980537e800dc1a7"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 27289600,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 8396800
},
{
"name": "model.layers.16.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8401920
},
{
"name": "model.layers.16.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18887680
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27276288
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27280384
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 27284480
}
],
"md5sum": "a61edfef11446ca99e769e1df08e1267"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.18.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "ac2fdf4213976668e5aa80b1d262b200"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "73d57042875e2e07e4b0f609e017c735"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 29373440,
"records": [
{
"name": "model.layers.17.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 18882560
},
{
"name": "model.layers.18.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18887680
}
],
"md5sum": "3abb7cf0a44a0b73e5137b61d1dabddd"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.19.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "c0f9b1d551c5f23d140dbdb87314eb3c"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "04f9bd1796ef023f16c8f879227f18d8"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.2.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "7bc4817941104c75718246d974ed5193"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "8e9a521dbceb7b63b33a46b7c5280dae"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 27289600,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 8396800
},
{
"name": "model.layers.19.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8401920
},
{
"name": "model.layers.19.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18887680
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27276288
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27280384
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 27284480
}
],
"md5sum": "db8679b3c3a963a2ecd1d8291d4a0cab"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.20.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "5f68575c10b75b3a9c0e4f1170f561f6"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "74c5a55fb43017a54caf745862f3a5ff"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 29373440,
"records": [
{
"name": "model.layers.2.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 18882560
},
{
"name": "model.layers.20.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18887680
}
],
"md5sum": "67cbfe7a7e0885134b290b5c53a399b9"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "214347f2fb8081a59b2c862cc9219e30"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.3.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "e4a4529f892e439e12c31bfb13046593"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "1a1ca8b9926654a6a926464cd0a8c964"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 27289600,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 8396800
},
{
"name": "model.layers.21.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8401920
},
{
"name": "model.layers.21.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18887680
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27276288
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27280384
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 27284480
}
],
"md5sum": "b3606dd298a4e5bb260e7d70987b6694"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.4.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "fcdec8529d65079960749f437f2a4f79"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "b85856695ac2cc6b23b43984d4246946"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 29373440,
"records": [
{
"name": "model.layers.3.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 18882560
},
{
"name": "model.layers.4.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18887680
}
],
"md5sum": "00ae1c8d4f65260416161a405e1481ee"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.5.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "0ddf1424188e8231e7ba3d3136bb35d4"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "098629b017a248912fa17e9032ccbe42"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.6.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "2fa47fd530a7896a5d3f164798004f5d"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "02c980ee910a4030b28037c0ffbc0242"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 27289600,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 8396800
},
{
"name": "model.layers.5.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8401920
},
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18887680
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27276288
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27280384
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 27284480
}
],
"md5sum": "9d325e8a401014b4354d693f03b067f1"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.7.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "d7d9998f5372b2a62e6700e14dba76ab"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "d5f943fef4c70ad775575f0b9272269a"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 29373440,
"records": [
{
"name": "model.layers.6.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 18882560
},
{
"name": "model.layers.7.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18887680
}
],
"md5sum": "821e893e7e7007d1b2c54ad1a1e5e409"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.8.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "b5c48f3ece0b879d6a2552db05b19cfc"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "9b819fb21b6a3b345e1c27f7e6d71e9a"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.9.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "5f909663574916741ceed1c39d858df6"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "779156a50a2d480c5b03179af05548e6"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 27289600,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 8396800
},
{
"name": "model.layers.8.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8401920
},
{
"name": "model.layers.8.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18887680
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27276288
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27280384
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 27284480
}
],
"md5sum": "4f1d844111ca65fdb37ee3a1a7e84765"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.21.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "16fb473b08fb56dd88d676eedb48193b"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.22.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "8c0f189b6ff437d8519774cec2d07753"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "88e2891c863f2fe0c52f2d56ba4f795e"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 29373440,
"records": [
{
"name": "model.layers.9.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 18882560
},
{
"name": "model.layers.22.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18887680
}
],
"md5sum": "2a7200dee16602ffda10b634e5f5b6e7"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.23.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "306407ec30a7cf3667b48023b469eb8c"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "3e9c86504bb7f37a115b73b2a89ad9ef"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.24.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "6aa6c27ceec822684eb2947e6ab6c844"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "776b5452c9e1347a040b3197973a5bd7"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 27289600,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 8396800
},
{
"name": "model.layers.23.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8401920
},
{
"name": "model.layers.23.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18887680
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27276288
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27280384
},
{
"name": "model.layers.24.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 27284480
}
],
"md5sum": "b8cded6bb57028d8caee5d16685eda56"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.25.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "8e00da96cd0492374f4c113556feadd8"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "5ff80e592ca02bcc840f2c45ffb377a3"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 29373440,
"records": [
{
"name": "model.layers.24.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.25.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 18882560
},
{
"name": "model.layers.25.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18887680
}
],
"md5sum": "6000b1252c865c1244a2bae136d7ef20"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.26.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "f8f1f022388cc0617f8d740c0fb7dafc"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "4b4e9ff85306691dec1ebc118da45aab"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.27.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "8567f854abc6dec3f694600ef5de6cb1"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "306fd8e1d1fb539cb4b0243206b70cf9"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 27289600,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.26.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 8396800
},
{
"name": "model.layers.26.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8401920
},
{
"name": "model.layers.26.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18887680
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27276288
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27280384
},
{
"name": "model.layers.27.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 27284480
}
],
"md5sum": "96cd5edca290c894a0b8556be3db83fa"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.28.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "6d7eec6feefa599db05df37bdafd0942"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "9c5eafa3521197bbe7e228bd348cb615"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 29373440,
"records": [
{
"name": "model.layers.27.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.28.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 18882560
},
{
"name": "model.layers.28.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18887680
}
],
"md5sum": "e686501bcee88364812b9aa417379861"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.29.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "25a1442240f3548f9ed920138d6f2302"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "649c542a832e9f48252b1e9837f813fd"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.30.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "d331c2101d88423c9f1666a2c683c22b"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "e4975ad33de1bf0d90a1473dc4bf930a"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 27289600,
"records": [
{
"name": "model.layers.28.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.29.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 8396800
},
{
"name": "model.layers.29.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8401920
},
{
"name": "model.layers.29.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18887680
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27276288
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27280384
},
{
"name": "model.layers.30.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 27284480
}
],
"md5sum": "6bc588c7e64f3cc48aa39089ab27d9dc"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.31.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "854d99689ea537c3f5538c3512908a21"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "729f4f7b87185ef60bce343e60e219d9"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 29373440,
"records": [
{
"name": "model.layers.30.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.31.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 18882560
},
{
"name": "model.layers.31.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18887680
}
],
"md5sum": "c6eb60e2bb0ae2c5fe0856c13dc51f34"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.32.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "59b8945b6e2c7116a9e142c52b4f2a7d"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "4860f118c1b7a21d144ffedf118af89d"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.33.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "3eced39182906479bc26b68da5f80148"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "0fe17a873e5d718536793729d0e2712e"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 27289600,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.32.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 8396800
},
{
"name": "model.layers.32.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8401920
},
{
"name": "model.layers.32.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18887680
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27276288
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27280384
},
{
"name": "model.layers.33.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 27284480
}
],
"md5sum": "40c0dfa72b018a22d11d30a45e3bb978"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.34.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "9b73caae98cd7e47ebb08c804f468df2"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "92f1f90019615d78f7f6ef17873e477d"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 29373440,
"records": [
{
"name": "model.layers.33.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.33.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.34.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 18882560
},
{
"name": "model.layers.34.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18887680
}
],
"md5sum": "4806c3a4257b09a7a8dff09505f59605"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.35.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "1de221761f59f15b8d24b05258fc6f89"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "8f79a06439059bdb5f769b209f66aa4c"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 27280384,
"records": [
{
"name": "model.layers.34.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.35.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 8396800
},
{
"name": "model.layers.35.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8401920
},
{
"name": "model.layers.35.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18887680
},
{
"name": "model.norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27276288
}
],
"md5sum": "6e3f4ecf3c3a86b1f33b4642e6de9869"
}
]
}