DeepSeek-V2-Lite-Chat-q0f16-MLC / ndarray-cache.json
riczhou's picture
Upload folder using huggingface_hub
6751d10 verified
{
"metadata": {
"ParamSize": 324,
"ParamBytes": 31412968448.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 419430400,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
102400,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 419430400,
"byteOffset": 0
}
],
"md5sum": "8efb052ccaeb3fe0dbaea6f9c829b02e"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 419430400,
"records": [
{
"name": "lm_head.weight",
"shape": [
102400,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 419430400,
"byteOffset": 0
}
],
"md5sum": "4a8627915485ce09d766748c2eaf73cb"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 89653248,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
21888,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 89653248,
"byteOffset": 0
}
],
"md5sum": "7dd211cf2b4ba021dc88957da62efa8b"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 44826624,
"records": [
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
2048,
10944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 44826624,
"byteOffset": 0
}
],
"md5sum": "1635cdc76da038d13d988f8ace5fdad5"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 27538432,
"records": [
{
"name": "model.norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.q_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 4096
},
{
"name": "model.layers.0.self_attn.kv_a_proj_with_mqa.weight",
"shape": [
576,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 12587008
},
{
"name": "model.layers.0.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 14946304
},
{
"name": "model.layers.0.self_attn.kv_b_proj.weight",
"shape": [
4096,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 14947328
},
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 19141632
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27530240
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27534336
}
],
"md5sum": "94604460dd83a6fb5270a48f573f6752"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.1.mlp.shared_experts.gate_up_proj.weight",
"shape": [
5632,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "1d2cfb5622ce34e22e5ae4957ee157de"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 27788288,
"records": [
{
"name": "model.layers.1.self_attn.q_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.kv_a_proj_with_mqa.weight",
"shape": [
576,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 12582912
},
{
"name": "model.layers.1.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 14942208
},
{
"name": "model.layers.1.self_attn.kv_b_proj.weight",
"shape": [
4096,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 14943232
},
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 19137536
},
{
"name": "model.layers.1.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 27526144
}
],
"md5sum": "62b69b1ec7025b08abe78b71605dcef0"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 738197504,
"records": [
{
"name": "model.layers.1.mlp.moe_gate_up_proj.weight",
"shape": [
64,
2816,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 738197504,
"byteOffset": 0
}
],
"md5sum": "0d385052823dda4d013a321974973bc5"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 369098752,
"records": [
{
"name": "model.layers.1.mlp.moe_down_proj.weight",
"shape": [
64,
2048,
1408
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 369098752,
"byteOffset": 0
}
],
"md5sum": "7879f962cef2ab4a381f9661261610da"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 30680064,
"records": [
{
"name": "model.layers.1.mlp.shared_experts.down_proj.weight",
"shape": [
2048,
2816
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.2.self_attn.q_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11542528
},
{
"name": "model.layers.2.self_attn.kv_a_proj_with_mqa.weight",
"shape": [
576,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 24125440
},
{
"name": "model.layers.2.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26484736
},
{
"name": "model.layers.2.self_attn.kv_b_proj.weight",
"shape": [
4096,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 26485760
}
],
"md5sum": "ae252989f064e2d6262194cda9d72ae7"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 31719424,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.2.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 8388608
},
{
"name": "model.layers.2.mlp.shared_experts.gate_up_proj.weight",
"shape": [
5632,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8650752
}
],
"md5sum": "9e753070cd9932604081fd8d4f8d6bb7"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 738197504,
"records": [
{
"name": "model.layers.2.mlp.moe_gate_up_proj.weight",
"shape": [
64,
2816,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 738197504,
"byteOffset": 0
}
],
"md5sum": "ed63f9bddb6b846adb976f22f6849c57"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 369098752,
"records": [
{
"name": "model.layers.2.mlp.moe_down_proj.weight",
"shape": [
64,
2048,
1408
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 369098752,
"byteOffset": 0
}
],
"md5sum": "8e02769b1751a12b2d6fe0ec659e411e"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 30680064,
"records": [
{
"name": "model.layers.2.mlp.shared_experts.down_proj.weight",
"shape": [
2048,
2816
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.3.self_attn.q_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11542528
},
{
"name": "model.layers.3.self_attn.kv_a_proj_with_mqa.weight",
"shape": [
576,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 24125440
},
{
"name": "model.layers.3.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26484736
},
{
"name": "model.layers.3.self_attn.kv_b_proj.weight",
"shape": [
4096,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 26485760
}
],
"md5sum": "4c9a575ac29f0912d0e8ffdebbd0a0a7"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 31719424,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.3.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 8388608
},
{
"name": "model.layers.3.mlp.shared_experts.gate_up_proj.weight",
"shape": [
5632,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8650752
}
],
"md5sum": "ac951ff044083478e8003210422bb563"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 738197504,
"records": [
{
"name": "model.layers.3.mlp.moe_gate_up_proj.weight",
"shape": [
64,
2816,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 738197504,
"byteOffset": 0
}
],
"md5sum": "af3e86be02579b3ebd5d5bb2c29363dc"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 369098752,
"records": [
{
"name": "model.layers.3.mlp.moe_down_proj.weight",
"shape": [
64,
2048,
1408
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 369098752,
"byteOffset": 0
}
],
"md5sum": "532d5d8699b182ab3946c316e2d5263b"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 30680064,
"records": [
{
"name": "model.layers.3.mlp.shared_experts.down_proj.weight",
"shape": [
2048,
2816
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.4.self_attn.q_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11542528
},
{
"name": "model.layers.4.self_attn.kv_a_proj_with_mqa.weight",
"shape": [
576,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 24125440
},
{
"name": "model.layers.4.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26484736
},
{
"name": "model.layers.4.self_attn.kv_b_proj.weight",
"shape": [
4096,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 26485760
}
],
"md5sum": "6181ef48c120f471179c405ecea2d745"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 31719424,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.4.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 8388608
},
{
"name": "model.layers.4.mlp.shared_experts.gate_up_proj.weight",
"shape": [
5632,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8650752
}
],
"md5sum": "9be38bc3c7592a96b3a9e2db17f41194"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 738197504,
"records": [
{
"name": "model.layers.4.mlp.moe_gate_up_proj.weight",
"shape": [
64,
2816,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 738197504,
"byteOffset": 0
}
],
"md5sum": "fbea4f36bbacd5f21d6ba0b21e94ad3a"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 369098752,
"records": [
{
"name": "model.layers.4.mlp.moe_down_proj.weight",
"shape": [
64,
2048,
1408
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 369098752,
"byteOffset": 0
}
],
"md5sum": "77dd723ae26c53f97e89686ab1a78619"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 30680064,
"records": [
{
"name": "model.layers.4.mlp.shared_experts.down_proj.weight",
"shape": [
2048,
2816
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.5.self_attn.q_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11542528
},
{
"name": "model.layers.5.self_attn.kv_a_proj_with_mqa.weight",
"shape": [
576,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 24125440
},
{
"name": "model.layers.5.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26484736
},
{
"name": "model.layers.5.self_attn.kv_b_proj.weight",
"shape": [
4096,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 26485760
}
],
"md5sum": "e06ba2c520e9e8e88e9281a0f3e8d36c"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 31719424,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.5.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 8388608
},
{
"name": "model.layers.5.mlp.shared_experts.gate_up_proj.weight",
"shape": [
5632,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8650752
}
],
"md5sum": "a95303fe48fb1ac354f0b892f72442ad"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 738197504,
"records": [
{
"name": "model.layers.5.mlp.moe_gate_up_proj.weight",
"shape": [
64,
2816,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 738197504,
"byteOffset": 0
}
],
"md5sum": "2183838b88d8e2dbf42fd5e66a85cc13"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 369098752,
"records": [
{
"name": "model.layers.5.mlp.moe_down_proj.weight",
"shape": [
64,
2048,
1408
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 369098752,
"byteOffset": 0
}
],
"md5sum": "765277f302d8375301976b81a0190f56"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 30680064,
"records": [
{
"name": "model.layers.5.mlp.shared_experts.down_proj.weight",
"shape": [
2048,
2816
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.6.self_attn.q_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11542528
},
{
"name": "model.layers.6.self_attn.kv_a_proj_with_mqa.weight",
"shape": [
576,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 24125440
},
{
"name": "model.layers.6.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26484736
},
{
"name": "model.layers.6.self_attn.kv_b_proj.weight",
"shape": [
4096,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 26485760
}
],
"md5sum": "f5ac7d4e7504426086402bb9e56e4f2d"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 31719424,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.6.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 8388608
},
{
"name": "model.layers.6.mlp.shared_experts.gate_up_proj.weight",
"shape": [
5632,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8650752
}
],
"md5sum": "f03fed09dbb39c9cc027d956a073238b"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 738197504,
"records": [
{
"name": "model.layers.6.mlp.moe_gate_up_proj.weight",
"shape": [
64,
2816,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 738197504,
"byteOffset": 0
}
],
"md5sum": "1b71f705ecf9afb9d9a20c160ed01309"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 369098752,
"records": [
{
"name": "model.layers.6.mlp.moe_down_proj.weight",
"shape": [
64,
2048,
1408
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 369098752,
"byteOffset": 0
}
],
"md5sum": "5eae4096c3981cec9178b6439b6af1c7"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 30680064,
"records": [
{
"name": "model.layers.6.mlp.shared_experts.down_proj.weight",
"shape": [
2048,
2816
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.7.self_attn.q_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11542528
},
{
"name": "model.layers.7.self_attn.kv_a_proj_with_mqa.weight",
"shape": [
576,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 24125440
},
{
"name": "model.layers.7.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26484736
},
{
"name": "model.layers.7.self_attn.kv_b_proj.weight",
"shape": [
4096,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 26485760
}
],
"md5sum": "50e2ea4e7b468d3a097a32eed4eb9640"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 31719424,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.7.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 8388608
},
{
"name": "model.layers.7.mlp.shared_experts.gate_up_proj.weight",
"shape": [
5632,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8650752
}
],
"md5sum": "de7ac6b69157d9e17fbd0f154f1be493"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 738197504,
"records": [
{
"name": "model.layers.7.mlp.moe_gate_up_proj.weight",
"shape": [
64,
2816,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 738197504,
"byteOffset": 0
}
],
"md5sum": "f3bad9a2d2bb3c12ccb9fb8e808ff753"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 369098752,
"records": [
{
"name": "model.layers.7.mlp.moe_down_proj.weight",
"shape": [
64,
2048,
1408
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 369098752,
"byteOffset": 0
}
],
"md5sum": "5aa5f6beb538b72b11a361cff10f5218"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 30680064,
"records": [
{
"name": "model.layers.7.mlp.shared_experts.down_proj.weight",
"shape": [
2048,
2816
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.8.self_attn.q_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11542528
},
{
"name": "model.layers.8.self_attn.kv_a_proj_with_mqa.weight",
"shape": [
576,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 24125440
},
{
"name": "model.layers.8.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26484736
},
{
"name": "model.layers.8.self_attn.kv_b_proj.weight",
"shape": [
4096,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 26485760
}
],
"md5sum": "61a8df6e9ff40b668dc98854175a1905"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 31719424,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.8.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 8388608
},
{
"name": "model.layers.8.mlp.shared_experts.gate_up_proj.weight",
"shape": [
5632,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8650752
}
],
"md5sum": "bde648537a1cab7e836ce4b10abf976a"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 738197504,
"records": [
{
"name": "model.layers.8.mlp.moe_gate_up_proj.weight",
"shape": [
64,
2816,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 738197504,
"byteOffset": 0
}
],
"md5sum": "91fcb48dc0b555b2377fef861deae02a"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 369098752,
"records": [
{
"name": "model.layers.8.mlp.moe_down_proj.weight",
"shape": [
64,
2048,
1408
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 369098752,
"byteOffset": 0
}
],
"md5sum": "60201e91d1e9379301be2b0848e22ebd"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 30680064,
"records": [
{
"name": "model.layers.8.mlp.shared_experts.down_proj.weight",
"shape": [
2048,
2816
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.9.self_attn.q_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11542528
},
{
"name": "model.layers.9.self_attn.kv_a_proj_with_mqa.weight",
"shape": [
576,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 24125440
},
{
"name": "model.layers.9.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26484736
},
{
"name": "model.layers.9.self_attn.kv_b_proj.weight",
"shape": [
4096,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 26485760
}
],
"md5sum": "162b5f3e8940473be617c3931a32b13a"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 31719424,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.9.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 8388608
},
{
"name": "model.layers.9.mlp.shared_experts.gate_up_proj.weight",
"shape": [
5632,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8650752
}
],
"md5sum": "433d0b07f9c2918a6c9ca699783811f4"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 738197504,
"records": [
{
"name": "model.layers.9.mlp.moe_gate_up_proj.weight",
"shape": [
64,
2816,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 738197504,
"byteOffset": 0
}
],
"md5sum": "3a57e4347113d8dd6c121e8b2338a11c"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 369098752,
"records": [
{
"name": "model.layers.9.mlp.moe_down_proj.weight",
"shape": [
64,
2048,
1408
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 369098752,
"byteOffset": 0
}
],
"md5sum": "f1e598f998ce420c612a30ef5b91e3ed"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 30680064,
"records": [
{
"name": "model.layers.9.mlp.shared_experts.down_proj.weight",
"shape": [
2048,
2816
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.10.self_attn.q_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11542528
},
{
"name": "model.layers.10.self_attn.kv_a_proj_with_mqa.weight",
"shape": [
576,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 24125440
},
{
"name": "model.layers.10.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26484736
},
{
"name": "model.layers.10.self_attn.kv_b_proj.weight",
"shape": [
4096,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 26485760
}
],
"md5sum": "ead6c9e151f9070da55594a35e1fdf28"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 31719424,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.10.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 8388608
},
{
"name": "model.layers.10.mlp.shared_experts.gate_up_proj.weight",
"shape": [
5632,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8650752
}
],
"md5sum": "64d1f10447145207fa6d5558be3d81f0"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 738197504,
"records": [
{
"name": "model.layers.10.mlp.moe_gate_up_proj.weight",
"shape": [
64,
2816,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 738197504,
"byteOffset": 0
}
],
"md5sum": "4f7679c3c359dff9b6b3664d056853a9"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 369098752,
"records": [
{
"name": "model.layers.10.mlp.moe_down_proj.weight",
"shape": [
64,
2048,
1408
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 369098752,
"byteOffset": 0
}
],
"md5sum": "a19d8185410a2952c77f81499606cf1e"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 30680064,
"records": [
{
"name": "model.layers.10.mlp.shared_experts.down_proj.weight",
"shape": [
2048,
2816
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.11.self_attn.q_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11542528
},
{
"name": "model.layers.11.self_attn.kv_a_proj_with_mqa.weight",
"shape": [
576,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 24125440
},
{
"name": "model.layers.11.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26484736
},
{
"name": "model.layers.11.self_attn.kv_b_proj.weight",
"shape": [
4096,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 26485760
}
],
"md5sum": "7682e0bf1ceedf464967c6775d0a6440"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 31719424,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.11.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 8388608
},
{
"name": "model.layers.11.mlp.shared_experts.gate_up_proj.weight",
"shape": [
5632,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8650752
}
],
"md5sum": "7d055e48551b487086938e176cfd5933"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 738197504,
"records": [
{
"name": "model.layers.11.mlp.moe_gate_up_proj.weight",
"shape": [
64,
2816,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 738197504,
"byteOffset": 0
}
],
"md5sum": "e7b76dc145f8beaca07dbf23adf94db5"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 369098752,
"records": [
{
"name": "model.layers.11.mlp.moe_down_proj.weight",
"shape": [
64,
2048,
1408
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 369098752,
"byteOffset": 0
}
],
"md5sum": "aa6952d761db9bb571ba5b5ac0e26d09"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 30680064,
"records": [
{
"name": "model.layers.11.mlp.shared_experts.down_proj.weight",
"shape": [
2048,
2816
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.12.self_attn.q_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11542528
},
{
"name": "model.layers.12.self_attn.kv_a_proj_with_mqa.weight",
"shape": [
576,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 24125440
},
{
"name": "model.layers.12.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26484736
},
{
"name": "model.layers.12.self_attn.kv_b_proj.weight",
"shape": [
4096,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 26485760
}
],
"md5sum": "64df32b84faa940dc5f062df9b0816e1"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 31719424,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.12.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 8388608
},
{
"name": "model.layers.12.mlp.shared_experts.gate_up_proj.weight",
"shape": [
5632,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8650752
}
],
"md5sum": "97d842c41752052c5f22f39a2bce18c9"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 738197504,
"records": [
{
"name": "model.layers.12.mlp.moe_gate_up_proj.weight",
"shape": [
64,
2816,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 738197504,
"byteOffset": 0
}
],
"md5sum": "920fed6b8d41bcf759c27b202bc20779"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 369098752,
"records": [
{
"name": "model.layers.12.mlp.moe_down_proj.weight",
"shape": [
64,
2048,
1408
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 369098752,
"byteOffset": 0
}
],
"md5sum": "e8593dd181fec8ae648689db895ea6bf"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 30680064,
"records": [
{
"name": "model.layers.12.mlp.shared_experts.down_proj.weight",
"shape": [
2048,
2816
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.13.self_attn.q_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11542528
},
{
"name": "model.layers.13.self_attn.kv_a_proj_with_mqa.weight",
"shape": [
576,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 24125440
},
{
"name": "model.layers.13.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26484736
},
{
"name": "model.layers.13.self_attn.kv_b_proj.weight",
"shape": [
4096,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 26485760
}
],
"md5sum": "76022278d2c3ae428e6aa302a66268f0"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 31719424,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.13.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 8388608
},
{
"name": "model.layers.13.mlp.shared_experts.gate_up_proj.weight",
"shape": [
5632,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8650752
}
],
"md5sum": "385c7e55d460c8958465b8b6ce74555f"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 738197504,
"records": [
{
"name": "model.layers.13.mlp.moe_gate_up_proj.weight",
"shape": [
64,
2816,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 738197504,
"byteOffset": 0
}
],
"md5sum": "6df68257505e91417cc9b5363fa8f6db"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 369098752,
"records": [
{
"name": "model.layers.13.mlp.moe_down_proj.weight",
"shape": [
64,
2048,
1408
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 369098752,
"byteOffset": 0
}
],
"md5sum": "c35f40ea0a1a49170c2956a1c7daf0ef"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 30680064,
"records": [
{
"name": "model.layers.13.mlp.shared_experts.down_proj.weight",
"shape": [
2048,
2816
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.14.self_attn.q_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11542528
},
{
"name": "model.layers.14.self_attn.kv_a_proj_with_mqa.weight",
"shape": [
576,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 24125440
},
{
"name": "model.layers.14.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26484736
},
{
"name": "model.layers.14.self_attn.kv_b_proj.weight",
"shape": [
4096,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 26485760
}
],
"md5sum": "7d1cacd90f7fa4807bfd98bae5cac89f"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 31719424,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.14.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 8388608
},
{
"name": "model.layers.14.mlp.shared_experts.gate_up_proj.weight",
"shape": [
5632,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8650752
}
],
"md5sum": "021f29d8ab1ad91e84c1fb23a09c97e5"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 738197504,
"records": [
{
"name": "model.layers.14.mlp.moe_gate_up_proj.weight",
"shape": [
64,
2816,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 738197504,
"byteOffset": 0
}
],
"md5sum": "b6571ad7b66464514a0092c4935d33b3"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 369098752,
"records": [
{
"name": "model.layers.14.mlp.moe_down_proj.weight",
"shape": [
64,
2048,
1408
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 369098752,
"byteOffset": 0
}
],
"md5sum": "38fc228fe8fb956b00c9f7a4cd3d0763"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 30680064,
"records": [
{
"name": "model.layers.14.mlp.shared_experts.down_proj.weight",
"shape": [
2048,
2816
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.15.self_attn.q_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11542528
},
{
"name": "model.layers.15.self_attn.kv_a_proj_with_mqa.weight",
"shape": [
576,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 24125440
},
{
"name": "model.layers.15.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26484736
},
{
"name": "model.layers.15.self_attn.kv_b_proj.weight",
"shape": [
4096,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 26485760
}
],
"md5sum": "172abfa0a744c2c47066e3c287a5e181"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 31719424,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.15.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 8388608
},
{
"name": "model.layers.15.mlp.shared_experts.gate_up_proj.weight",
"shape": [
5632,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8650752
}
],
"md5sum": "cd644fc4266fcb1cb8ee6641c40ada1e"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 738197504,
"records": [
{
"name": "model.layers.15.mlp.moe_gate_up_proj.weight",
"shape": [
64,
2816,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 738197504,
"byteOffset": 0
}
],
"md5sum": "024de581071a63e743015499c6e939c9"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 369098752,
"records": [
{
"name": "model.layers.15.mlp.moe_down_proj.weight",
"shape": [
64,
2048,
1408
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 369098752,
"byteOffset": 0
}
],
"md5sum": "872beb9a9c644270aa00e969f811b6cf"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 30680064,
"records": [
{
"name": "model.layers.15.mlp.shared_experts.down_proj.weight",
"shape": [
2048,
2816
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.16.self_attn.q_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11542528
},
{
"name": "model.layers.16.self_attn.kv_a_proj_with_mqa.weight",
"shape": [
576,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 24125440
},
{
"name": "model.layers.16.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26484736
},
{
"name": "model.layers.16.self_attn.kv_b_proj.weight",
"shape": [
4096,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 26485760
}
],
"md5sum": "1be2d13a1256429d28b3294c4d94cf14"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 31719424,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.16.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 8388608
},
{
"name": "model.layers.16.mlp.shared_experts.gate_up_proj.weight",
"shape": [
5632,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8650752
}
],
"md5sum": "62f2913b05acb718ed1632b043cd299d"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 738197504,
"records": [
{
"name": "model.layers.16.mlp.moe_gate_up_proj.weight",
"shape": [
64,
2816,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 738197504,
"byteOffset": 0
}
],
"md5sum": "a0d4f4b1c69bbaf0b945cfb7e24c5f0d"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 369098752,
"records": [
{
"name": "model.layers.16.mlp.moe_down_proj.weight",
"shape": [
64,
2048,
1408
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 369098752,
"byteOffset": 0
}
],
"md5sum": "8b0496751189d06bcb09dc93b2cdfa18"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 30680064,
"records": [
{
"name": "model.layers.16.mlp.shared_experts.down_proj.weight",
"shape": [
2048,
2816
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.17.self_attn.q_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11542528
},
{
"name": "model.layers.17.self_attn.kv_a_proj_with_mqa.weight",
"shape": [
576,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 24125440
},
{
"name": "model.layers.17.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26484736
},
{
"name": "model.layers.17.self_attn.kv_b_proj.weight",
"shape": [
4096,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 26485760
}
],
"md5sum": "28449c374e5b0112dcec0039c65a2da4"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 31719424,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.17.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 8388608
},
{
"name": "model.layers.17.mlp.shared_experts.gate_up_proj.weight",
"shape": [
5632,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8650752
}
],
"md5sum": "fdd75774fd1dd316eebb728424e6582b"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 738197504,
"records": [
{
"name": "model.layers.17.mlp.moe_gate_up_proj.weight",
"shape": [
64,
2816,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 738197504,
"byteOffset": 0
}
],
"md5sum": "e792edb4693a3797d3dbd321e34edc93"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 369098752,
"records": [
{
"name": "model.layers.17.mlp.moe_down_proj.weight",
"shape": [
64,
2048,
1408
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 369098752,
"byteOffset": 0
}
],
"md5sum": "55cdebbe031e9a038b2f4ac15095a554"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 30680064,
"records": [
{
"name": "model.layers.17.mlp.shared_experts.down_proj.weight",
"shape": [
2048,
2816
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.18.self_attn.q_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11542528
},
{
"name": "model.layers.18.self_attn.kv_a_proj_with_mqa.weight",
"shape": [
576,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 24125440
},
{
"name": "model.layers.18.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26484736
},
{
"name": "model.layers.18.self_attn.kv_b_proj.weight",
"shape": [
4096,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 26485760
}
],
"md5sum": "c98b63f62cbf058c2e2614ff592a241c"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 31719424,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.18.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 8388608
},
{
"name": "model.layers.18.mlp.shared_experts.gate_up_proj.weight",
"shape": [
5632,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8650752
}
],
"md5sum": "a0d61627ed864bcccbc2f7e9476ea4ed"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 738197504,
"records": [
{
"name": "model.layers.18.mlp.moe_gate_up_proj.weight",
"shape": [
64,
2816,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 738197504,
"byteOffset": 0
}
],
"md5sum": "5497deb6d47fcf900567b190e66d8635"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 369098752,
"records": [
{
"name": "model.layers.18.mlp.moe_down_proj.weight",
"shape": [
64,
2048,
1408
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 369098752,
"byteOffset": 0
}
],
"md5sum": "ac51832091092bb06f38b4a93d189c2f"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 30680064,
"records": [
{
"name": "model.layers.18.mlp.shared_experts.down_proj.weight",
"shape": [
2048,
2816
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.19.self_attn.q_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11542528
},
{
"name": "model.layers.19.self_attn.kv_a_proj_with_mqa.weight",
"shape": [
576,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 24125440
},
{
"name": "model.layers.19.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26484736
},
{
"name": "model.layers.19.self_attn.kv_b_proj.weight",
"shape": [
4096,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 26485760
}
],
"md5sum": "fafa0b88ba9041d96b889a2e11240fc6"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 31719424,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.19.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 8388608
},
{
"name": "model.layers.19.mlp.shared_experts.gate_up_proj.weight",
"shape": [
5632,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8650752
}
],
"md5sum": "e8460ae3840a171387ceb2a0ba408434"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 738197504,
"records": [
{
"name": "model.layers.19.mlp.moe_gate_up_proj.weight",
"shape": [
64,
2816,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 738197504,
"byteOffset": 0
}
],
"md5sum": "d4f82454b023c30c682459c321510125"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 369098752,
"records": [
{
"name": "model.layers.19.mlp.moe_down_proj.weight",
"shape": [
64,
2048,
1408
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 369098752,
"byteOffset": 0
}
],
"md5sum": "bf016bb75ed2adbd7465f9a581af78ca"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 30680064,
"records": [
{
"name": "model.layers.19.mlp.shared_experts.down_proj.weight",
"shape": [
2048,
2816
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.20.self_attn.q_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11542528
},
{
"name": "model.layers.20.self_attn.kv_a_proj_with_mqa.weight",
"shape": [
576,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 24125440
},
{
"name": "model.layers.20.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26484736
},
{
"name": "model.layers.20.self_attn.kv_b_proj.weight",
"shape": [
4096,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 26485760
}
],
"md5sum": "dc9f12206fee09fdda705c7f5ca48d31"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 31719424,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.20.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 8388608
},
{
"name": "model.layers.20.mlp.shared_experts.gate_up_proj.weight",
"shape": [
5632,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8650752
}
],
"md5sum": "b93b5e7bc89ac4f1865b9ff7f2962a5e"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 738197504,
"records": [
{
"name": "model.layers.20.mlp.moe_gate_up_proj.weight",
"shape": [
64,
2816,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 738197504,
"byteOffset": 0
}
],
"md5sum": "ff720d5a40509556cfb6f7cc265a86e5"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 369098752,
"records": [
{
"name": "model.layers.20.mlp.moe_down_proj.weight",
"shape": [
64,
2048,
1408
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 369098752,
"byteOffset": 0
}
],
"md5sum": "a56acdfc6ac05f89ef92e6e76c7e5e56"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 30680064,
"records": [
{
"name": "model.layers.20.mlp.shared_experts.down_proj.weight",
"shape": [
2048,
2816
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.21.self_attn.q_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11542528
},
{
"name": "model.layers.21.self_attn.kv_a_proj_with_mqa.weight",
"shape": [
576,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 24125440
},
{
"name": "model.layers.21.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26484736
},
{
"name": "model.layers.21.self_attn.kv_b_proj.weight",
"shape": [
4096,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 26485760
}
],
"md5sum": "1a98106be8c67ca6e81f60734fc7828f"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 31719424,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.21.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 8388608
},
{
"name": "model.layers.21.mlp.shared_experts.gate_up_proj.weight",
"shape": [
5632,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8650752
}
],
"md5sum": "0072c307e38b1d9b1a540a9e783ee063"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 738197504,
"records": [
{
"name": "model.layers.21.mlp.moe_gate_up_proj.weight",
"shape": [
64,
2816,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 738197504,
"byteOffset": 0
}
],
"md5sum": "1318eb0ebaf36823959598d23d43de7e"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 369098752,
"records": [
{
"name": "model.layers.21.mlp.moe_down_proj.weight",
"shape": [
64,
2048,
1408
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 369098752,
"byteOffset": 0
}
],
"md5sum": "d26c2e179b658530d9a17379c1c9987d"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 30680064,
"records": [
{
"name": "model.layers.21.mlp.shared_experts.down_proj.weight",
"shape": [
2048,
2816
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.22.self_attn.q_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11542528
},
{
"name": "model.layers.22.self_attn.kv_a_proj_with_mqa.weight",
"shape": [
576,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 24125440
},
{
"name": "model.layers.22.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26484736
},
{
"name": "model.layers.22.self_attn.kv_b_proj.weight",
"shape": [
4096,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 26485760
}
],
"md5sum": "e1802f9d6d19d317d9b12de13ab6dd1b"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 31719424,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.22.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 8388608
},
{
"name": "model.layers.22.mlp.shared_experts.gate_up_proj.weight",
"shape": [
5632,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8650752
}
],
"md5sum": "5e2e6062cacb3e83133f96cd2cc88715"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 738197504,
"records": [
{
"name": "model.layers.22.mlp.moe_gate_up_proj.weight",
"shape": [
64,
2816,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 738197504,
"byteOffset": 0
}
],
"md5sum": "b5fec625999a5637bd8362448162e620"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 369098752,
"records": [
{
"name": "model.layers.22.mlp.moe_down_proj.weight",
"shape": [
64,
2048,
1408
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 369098752,
"byteOffset": 0
}
],
"md5sum": "f0aec2278df22d15ce0cd31aa5cb06fb"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 30680064,
"records": [
{
"name": "model.layers.22.mlp.shared_experts.down_proj.weight",
"shape": [
2048,
2816
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.23.self_attn.q_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11542528
},
{
"name": "model.layers.23.self_attn.kv_a_proj_with_mqa.weight",
"shape": [
576,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 24125440
},
{
"name": "model.layers.23.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26484736
},
{
"name": "model.layers.23.self_attn.kv_b_proj.weight",
"shape": [
4096,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 26485760
}
],
"md5sum": "ab50dbb1d774a03ef3b9bd45a1f27576"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 31719424,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.23.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 8388608
},
{
"name": "model.layers.23.mlp.shared_experts.gate_up_proj.weight",
"shape": [
5632,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8650752
}
],
"md5sum": "d3441884a61a1d9ef456c4d37e01e519"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 738197504,
"records": [
{
"name": "model.layers.23.mlp.moe_gate_up_proj.weight",
"shape": [
64,
2816,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 738197504,
"byteOffset": 0
}
],
"md5sum": "270f2fca80123e3c2e03dafb4d11945d"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 369098752,
"records": [
{
"name": "model.layers.23.mlp.moe_down_proj.weight",
"shape": [
64,
2048,
1408
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 369098752,
"byteOffset": 0
}
],
"md5sum": "74a8ff695a17ed0c5bdca287e3a9f69d"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 30680064,
"records": [
{
"name": "model.layers.23.mlp.shared_experts.down_proj.weight",
"shape": [
2048,
2816
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.24.self_attn.q_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11542528
},
{
"name": "model.layers.24.self_attn.kv_a_proj_with_mqa.weight",
"shape": [
576,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 24125440
},
{
"name": "model.layers.24.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26484736
},
{
"name": "model.layers.24.self_attn.kv_b_proj.weight",
"shape": [
4096,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 26485760
}
],
"md5sum": "495aa2d785f69996a95d601383696e0d"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 31719424,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.24.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 8388608
},
{
"name": "model.layers.24.mlp.shared_experts.gate_up_proj.weight",
"shape": [
5632,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8650752
}
],
"md5sum": "e37b643bca4a30f6523a706310e92570"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 738197504,
"records": [
{
"name": "model.layers.24.mlp.moe_gate_up_proj.weight",
"shape": [
64,
2816,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 738197504,
"byteOffset": 0
}
],
"md5sum": "fad9bef60135f36949d730f44b2f88ea"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 369098752,
"records": [
{
"name": "model.layers.24.mlp.moe_down_proj.weight",
"shape": [
64,
2048,
1408
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 369098752,
"byteOffset": 0
}
],
"md5sum": "85224dab3597f298836b4089343c3119"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 30680064,
"records": [
{
"name": "model.layers.24.mlp.shared_experts.down_proj.weight",
"shape": [
2048,
2816
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.25.self_attn.q_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11542528
},
{
"name": "model.layers.25.self_attn.kv_a_proj_with_mqa.weight",
"shape": [
576,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 24125440
},
{
"name": "model.layers.25.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26484736
},
{
"name": "model.layers.25.self_attn.kv_b_proj.weight",
"shape": [
4096,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 26485760
}
],
"md5sum": "3eaf56b119050efac9e8f15ba69b8122"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 31719424,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.25.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 8388608
},
{
"name": "model.layers.25.mlp.shared_experts.gate_up_proj.weight",
"shape": [
5632,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8650752
}
],
"md5sum": "e11d8cb9d5680883bf6e28de0413d6fd"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 738197504,
"records": [
{
"name": "model.layers.25.mlp.moe_gate_up_proj.weight",
"shape": [
64,
2816,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 738197504,
"byteOffset": 0
}
],
"md5sum": "e1767425d3e359642a15db736f843d53"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 369098752,
"records": [
{
"name": "model.layers.25.mlp.moe_down_proj.weight",
"shape": [
64,
2048,
1408
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 369098752,
"byteOffset": 0
}
],
"md5sum": "d421d28b021fc111a859140e195fc585"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 30680064,
"records": [
{
"name": "model.layers.25.mlp.shared_experts.down_proj.weight",
"shape": [
2048,
2816
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.26.self_attn.q_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11542528
},
{
"name": "model.layers.26.self_attn.kv_a_proj_with_mqa.weight",
"shape": [
576,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 24125440
},
{
"name": "model.layers.26.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26484736
},
{
"name": "model.layers.26.self_attn.kv_b_proj.weight",
"shape": [
4096,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 26485760
}
],
"md5sum": "00aa0d9b27e135e66991d5df5e59fd7c"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 31719424,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.26.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 8388608
},
{
"name": "model.layers.26.mlp.shared_experts.gate_up_proj.weight",
"shape": [
5632,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8650752
}
],
"md5sum": "bd9499d319f006f5994ac1b2824ae015"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 738197504,
"records": [
{
"name": "model.layers.26.mlp.moe_gate_up_proj.weight",
"shape": [
64,
2816,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 738197504,
"byteOffset": 0
}
],
"md5sum": "1c2686eaa97ce37a79a2c7e62f13da8e"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 369098752,
"records": [
{
"name": "model.layers.26.mlp.moe_down_proj.weight",
"shape": [
64,
2048,
1408
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 369098752,
"byteOffset": 0
}
],
"md5sum": "6d98de740a18641d2e2da76b56cdcc88"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 11542528,
"records": [
{
"name": "model.layers.26.mlp.shared_experts.down_proj.weight",
"shape": [
2048,
2816
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
}
],
"md5sum": "a4a21384a05a39109863995203cef2a0"
}
]
}