|
{ |
|
"metadata": { |
|
"ParamSize": 324, |
|
"ParamBytes": 31412968448.0, |
|
"BitsPerParam": 16.0 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 419430400, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.weight", |
|
"shape": [ |
|
102400, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 419430400, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8efb052ccaeb3fe0dbaea6f9c829b02e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 419430400, |
|
"records": [ |
|
{ |
|
"name": "lm_head.weight", |
|
"shape": [ |
|
102400, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 419430400, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4a8627915485ce09d766748c2eaf73cb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 89653248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
21888, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 89653248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7dd211cf2b4ba021dc88957da62efa8b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 44826624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
10944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 44826624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1635cdc76da038d13d988f8ace5fdad5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27538432, |
|
"records": [ |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.q_proj.weight", |
|
"shape": [ |
|
3072, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 4096 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.kv_a_proj_with_mqa.weight", |
|
"shape": [ |
|
576, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 12587008 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 14946304 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.kv_b_proj.weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 14947328 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 19141632 |
|
}, |
|
{ |
|
"name": "model.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 27530240 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 27534336 |
|
} |
|
], |
|
"md5sum": "94604460dd83a6fb5270a48f573f6752" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.shared_experts.gate_up_proj.weight", |
|
"shape": [ |
|
5632, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1d2cfb5622ce34e22e5ae4957ee157de" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27788288, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.q_proj.weight", |
|
"shape": [ |
|
3072, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.kv_a_proj_with_mqa.weight", |
|
"shape": [ |
|
576, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 14942208 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.kv_b_proj.weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 14943232 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 19137536 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 27526144 |
|
} |
|
], |
|
"md5sum": "62b69b1ec7025b08abe78b71605dcef0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 738197504, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.moe_gate_up_proj.weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 738197504, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0d385052823dda4d013a321974973bc5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 369098752, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.moe_down_proj.weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
1408 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 369098752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7879f962cef2ab4a381f9661261610da" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30680064, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.shared_experts.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
2816 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.q_proj.weight", |
|
"shape": [ |
|
3072, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.kv_a_proj_with_mqa.weight", |
|
"shape": [ |
|
576, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 24125440 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 26484736 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.kv_b_proj.weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 26485760 |
|
} |
|
], |
|
"md5sum": "ae252989f064e2d6262194cda9d72ae7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31719424, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.shared_experts.gate_up_proj.weight", |
|
"shape": [ |
|
5632, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8650752 |
|
} |
|
], |
|
"md5sum": "9e753070cd9932604081fd8d4f8d6bb7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 738197504, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.moe_gate_up_proj.weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 738197504, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ed63f9bddb6b846adb976f22f6849c57" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 369098752, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.moe_down_proj.weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
1408 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 369098752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8e02769b1751a12b2d6fe0ec659e411e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30680064, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.shared_experts.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
2816 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.q_proj.weight", |
|
"shape": [ |
|
3072, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.kv_a_proj_with_mqa.weight", |
|
"shape": [ |
|
576, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 24125440 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 26484736 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.kv_b_proj.weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 26485760 |
|
} |
|
], |
|
"md5sum": "4c9a575ac29f0912d0e8ffdebbd0a0a7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31719424, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.shared_experts.gate_up_proj.weight", |
|
"shape": [ |
|
5632, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8650752 |
|
} |
|
], |
|
"md5sum": "ac951ff044083478e8003210422bb563" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 738197504, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.moe_gate_up_proj.weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 738197504, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "af3e86be02579b3ebd5d5bb2c29363dc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 369098752, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.moe_down_proj.weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
1408 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 369098752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "532d5d8699b182ab3946c316e2d5263b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30680064, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.shared_experts.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
2816 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.q_proj.weight", |
|
"shape": [ |
|
3072, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.kv_a_proj_with_mqa.weight", |
|
"shape": [ |
|
576, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 24125440 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 26484736 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.kv_b_proj.weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 26485760 |
|
} |
|
], |
|
"md5sum": "6181ef48c120f471179c405ecea2d745" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31719424, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.shared_experts.gate_up_proj.weight", |
|
"shape": [ |
|
5632, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8650752 |
|
} |
|
], |
|
"md5sum": "9be38bc3c7592a96b3a9e2db17f41194" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 738197504, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.moe_gate_up_proj.weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 738197504, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fbea4f36bbacd5f21d6ba0b21e94ad3a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 369098752, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.moe_down_proj.weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
1408 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 369098752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "77dd723ae26c53f97e89686ab1a78619" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30680064, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.shared_experts.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
2816 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.4.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.q_proj.weight", |
|
"shape": [ |
|
3072, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.kv_a_proj_with_mqa.weight", |
|
"shape": [ |
|
576, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 24125440 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 26484736 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.kv_b_proj.weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 26485760 |
|
} |
|
], |
|
"md5sum": "e06ba2c520e9e8e88e9281a0f3e8d36c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31719424, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.shared_experts.gate_up_proj.weight", |
|
"shape": [ |
|
5632, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8650752 |
|
} |
|
], |
|
"md5sum": "a95303fe48fb1ac354f0b892f72442ad" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 738197504, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.moe_gate_up_proj.weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 738197504, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2183838b88d8e2dbf42fd5e66a85cc13" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 369098752, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.moe_down_proj.weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
1408 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 369098752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "765277f302d8375301976b81a0190f56" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30680064, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.shared_experts.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
2816 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.q_proj.weight", |
|
"shape": [ |
|
3072, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.kv_a_proj_with_mqa.weight", |
|
"shape": [ |
|
576, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 24125440 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 26484736 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.kv_b_proj.weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 26485760 |
|
} |
|
], |
|
"md5sum": "f5ac7d4e7504426086402bb9e56e4f2d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31719424, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.shared_experts.gate_up_proj.weight", |
|
"shape": [ |
|
5632, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8650752 |
|
} |
|
], |
|
"md5sum": "f03fed09dbb39c9cc027d956a073238b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 738197504, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.moe_gate_up_proj.weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 738197504, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1b71f705ecf9afb9d9a20c160ed01309" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 369098752, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.moe_down_proj.weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
1408 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 369098752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5eae4096c3981cec9178b6439b6af1c7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30680064, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.shared_experts.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
2816 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.6.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.q_proj.weight", |
|
"shape": [ |
|
3072, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.kv_a_proj_with_mqa.weight", |
|
"shape": [ |
|
576, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 24125440 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 26484736 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.kv_b_proj.weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 26485760 |
|
} |
|
], |
|
"md5sum": "50e2ea4e7b468d3a097a32eed4eb9640" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31719424, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.shared_experts.gate_up_proj.weight", |
|
"shape": [ |
|
5632, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8650752 |
|
} |
|
], |
|
"md5sum": "de7ac6b69157d9e17fbd0f154f1be493" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 738197504, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.moe_gate_up_proj.weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 738197504, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f3bad9a2d2bb3c12ccb9fb8e808ff753" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 369098752, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.moe_down_proj.weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
1408 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 369098752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5aa5f6beb538b72b11a361cff10f5218" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30680064, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.shared_experts.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
2816 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.q_proj.weight", |
|
"shape": [ |
|
3072, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.kv_a_proj_with_mqa.weight", |
|
"shape": [ |
|
576, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 24125440 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 26484736 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.kv_b_proj.weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 26485760 |
|
} |
|
], |
|
"md5sum": "61a8df6e9ff40b668dc98854175a1905" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31719424, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.shared_experts.gate_up_proj.weight", |
|
"shape": [ |
|
5632, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8650752 |
|
} |
|
], |
|
"md5sum": "bde648537a1cab7e836ce4b10abf976a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 738197504, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.moe_gate_up_proj.weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 738197504, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "91fcb48dc0b555b2377fef861deae02a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 369098752, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.moe_down_proj.weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
1408 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 369098752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "60201e91d1e9379301be2b0848e22ebd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30680064, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.shared_experts.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
2816 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.8.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.q_proj.weight", |
|
"shape": [ |
|
3072, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.kv_a_proj_with_mqa.weight", |
|
"shape": [ |
|
576, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 24125440 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 26484736 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.kv_b_proj.weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 26485760 |
|
} |
|
], |
|
"md5sum": "162b5f3e8940473be617c3931a32b13a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31719424, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.shared_experts.gate_up_proj.weight", |
|
"shape": [ |
|
5632, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8650752 |
|
} |
|
], |
|
"md5sum": "433d0b07f9c2918a6c9ca699783811f4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 738197504, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.moe_gate_up_proj.weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 738197504, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3a57e4347113d8dd6c121e8b2338a11c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 369098752, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.moe_down_proj.weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
1408 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 369098752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f1e598f998ce420c612a30ef5b91e3ed" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30680064, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.shared_experts.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
2816 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.q_proj.weight", |
|
"shape": [ |
|
3072, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.kv_a_proj_with_mqa.weight", |
|
"shape": [ |
|
576, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 24125440 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 26484736 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.kv_b_proj.weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 26485760 |
|
} |
|
], |
|
"md5sum": "ead6c9e151f9070da55594a35e1fdf28" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31719424, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.shared_experts.gate_up_proj.weight", |
|
"shape": [ |
|
5632, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8650752 |
|
} |
|
], |
|
"md5sum": "64d1f10447145207fa6d5558be3d81f0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 738197504, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.moe_gate_up_proj.weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 738197504, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4f7679c3c359dff9b6b3664d056853a9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 369098752, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.moe_down_proj.weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
1408 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 369098752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a19d8185410a2952c77f81499606cf1e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30680064, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.shared_experts.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
2816 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.q_proj.weight", |
|
"shape": [ |
|
3072, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.kv_a_proj_with_mqa.weight", |
|
"shape": [ |
|
576, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 24125440 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 26484736 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.kv_b_proj.weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 26485760 |
|
} |
|
], |
|
"md5sum": "7682e0bf1ceedf464967c6775d0a6440" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31719424, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.shared_experts.gate_up_proj.weight", |
|
"shape": [ |
|
5632, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8650752 |
|
} |
|
], |
|
"md5sum": "7d055e48551b487086938e176cfd5933" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 738197504, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.moe_gate_up_proj.weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 738197504, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e7b76dc145f8beaca07dbf23adf94db5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 369098752, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.moe_down_proj.weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
1408 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 369098752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aa6952d761db9bb571ba5b5ac0e26d09" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30680064, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.shared_experts.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
2816 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.11.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.q_proj.weight", |
|
"shape": [ |
|
3072, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.kv_a_proj_with_mqa.weight", |
|
"shape": [ |
|
576, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 24125440 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 26484736 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.kv_b_proj.weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 26485760 |
|
} |
|
], |
|
"md5sum": "64df32b84faa940dc5f062df9b0816e1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31719424, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.shared_experts.gate_up_proj.weight", |
|
"shape": [ |
|
5632, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8650752 |
|
} |
|
], |
|
"md5sum": "97d842c41752052c5f22f39a2bce18c9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 738197504, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.moe_gate_up_proj.weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 738197504, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "920fed6b8d41bcf759c27b202bc20779" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 369098752, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.moe_down_proj.weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
1408 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 369098752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e8593dd181fec8ae648689db895ea6bf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30680064, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.shared_experts.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
2816 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.12.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.q_proj.weight", |
|
"shape": [ |
|
3072, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.kv_a_proj_with_mqa.weight", |
|
"shape": [ |
|
576, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 24125440 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 26484736 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.kv_b_proj.weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 26485760 |
|
} |
|
], |
|
"md5sum": "76022278d2c3ae428e6aa302a66268f0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31719424, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.shared_experts.gate_up_proj.weight", |
|
"shape": [ |
|
5632, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8650752 |
|
} |
|
], |
|
"md5sum": "385c7e55d460c8958465b8b6ce74555f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 738197504, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.moe_gate_up_proj.weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 738197504, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6df68257505e91417cc9b5363fa8f6db" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 369098752, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.moe_down_proj.weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
1408 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 369098752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c35f40ea0a1a49170c2956a1c7daf0ef" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30680064, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.shared_experts.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
2816 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.13.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.q_proj.weight", |
|
"shape": [ |
|
3072, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.kv_a_proj_with_mqa.weight", |
|
"shape": [ |
|
576, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 24125440 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 26484736 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.kv_b_proj.weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 26485760 |
|
} |
|
], |
|
"md5sum": "7d1cacd90f7fa4807bfd98bae5cac89f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31719424, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.shared_experts.gate_up_proj.weight", |
|
"shape": [ |
|
5632, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8650752 |
|
} |
|
], |
|
"md5sum": "021f29d8ab1ad91e84c1fb23a09c97e5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 738197504, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.moe_gate_up_proj.weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 738197504, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b6571ad7b66464514a0092c4935d33b3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 369098752, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.moe_down_proj.weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
1408 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 369098752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "38fc228fe8fb956b00c9f7a4cd3d0763" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30680064, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.shared_experts.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
2816 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.14.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.q_proj.weight", |
|
"shape": [ |
|
3072, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.kv_a_proj_with_mqa.weight", |
|
"shape": [ |
|
576, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 24125440 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 26484736 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.kv_b_proj.weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 26485760 |
|
} |
|
], |
|
"md5sum": "172abfa0a744c2c47066e3c287a5e181" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31719424, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.shared_experts.gate_up_proj.weight", |
|
"shape": [ |
|
5632, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8650752 |
|
} |
|
], |
|
"md5sum": "cd644fc4266fcb1cb8ee6641c40ada1e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 738197504, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.moe_gate_up_proj.weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 738197504, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "024de581071a63e743015499c6e939c9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 369098752, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.moe_down_proj.weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
1408 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 369098752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "872beb9a9c644270aa00e969f811b6cf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30680064, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.shared_experts.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
2816 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.q_proj.weight", |
|
"shape": [ |
|
3072, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.kv_a_proj_with_mqa.weight", |
|
"shape": [ |
|
576, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 24125440 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 26484736 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.kv_b_proj.weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 26485760 |
|
} |
|
], |
|
"md5sum": "1be2d13a1256429d28b3294c4d94cf14" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31719424, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.shared_experts.gate_up_proj.weight", |
|
"shape": [ |
|
5632, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8650752 |
|
} |
|
], |
|
"md5sum": "62f2913b05acb718ed1632b043cd299d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 738197504, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.moe_gate_up_proj.weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 738197504, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a0d4f4b1c69bbaf0b945cfb7e24c5f0d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 369098752, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.moe_down_proj.weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
1408 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 369098752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8b0496751189d06bcb09dc93b2cdfa18" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30680064, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.shared_experts.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
2816 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.16.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.q_proj.weight", |
|
"shape": [ |
|
3072, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.kv_a_proj_with_mqa.weight", |
|
"shape": [ |
|
576, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 24125440 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 26484736 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.kv_b_proj.weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 26485760 |
|
} |
|
], |
|
"md5sum": "28449c374e5b0112dcec0039c65a2da4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31719424, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.shared_experts.gate_up_proj.weight", |
|
"shape": [ |
|
5632, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8650752 |
|
} |
|
], |
|
"md5sum": "fdd75774fd1dd316eebb728424e6582b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 738197504, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.moe_gate_up_proj.weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 738197504, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e792edb4693a3797d3dbd321e34edc93" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 369098752, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.moe_down_proj.weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
1408 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 369098752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "55cdebbe031e9a038b2f4ac15095a554" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30680064, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.shared_experts.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
2816 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.17.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.q_proj.weight", |
|
"shape": [ |
|
3072, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.kv_a_proj_with_mqa.weight", |
|
"shape": [ |
|
576, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 24125440 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 26484736 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.kv_b_proj.weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 26485760 |
|
} |
|
], |
|
"md5sum": "c98b63f62cbf058c2e2614ff592a241c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31719424, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.shared_experts.gate_up_proj.weight", |
|
"shape": [ |
|
5632, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8650752 |
|
} |
|
], |
|
"md5sum": "a0d61627ed864bcccbc2f7e9476ea4ed" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 738197504, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.moe_gate_up_proj.weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 738197504, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5497deb6d47fcf900567b190e66d8635" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 369098752, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.moe_down_proj.weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
1408 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 369098752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ac51832091092bb06f38b4a93d189c2f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30680064, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.shared_experts.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
2816 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.18.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.q_proj.weight", |
|
"shape": [ |
|
3072, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.kv_a_proj_with_mqa.weight", |
|
"shape": [ |
|
576, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 24125440 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 26484736 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.kv_b_proj.weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 26485760 |
|
} |
|
], |
|
"md5sum": "fafa0b88ba9041d96b889a2e11240fc6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31719424, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.shared_experts.gate_up_proj.weight", |
|
"shape": [ |
|
5632, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8650752 |
|
} |
|
], |
|
"md5sum": "e8460ae3840a171387ceb2a0ba408434" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 738197504, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.moe_gate_up_proj.weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 738197504, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d4f82454b023c30c682459c321510125" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 369098752, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.moe_down_proj.weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
1408 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 369098752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bf016bb75ed2adbd7465f9a581af78ca" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30680064, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.shared_experts.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
2816 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.19.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.q_proj.weight", |
|
"shape": [ |
|
3072, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.kv_a_proj_with_mqa.weight", |
|
"shape": [ |
|
576, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 24125440 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 26484736 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.kv_b_proj.weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 26485760 |
|
} |
|
], |
|
"md5sum": "dc9f12206fee09fdda705c7f5ca48d31" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31719424, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.shared_experts.gate_up_proj.weight", |
|
"shape": [ |
|
5632, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8650752 |
|
} |
|
], |
|
"md5sum": "b93b5e7bc89ac4f1865b9ff7f2962a5e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 738197504, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.moe_gate_up_proj.weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 738197504, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ff720d5a40509556cfb6f7cc265a86e5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 369098752, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.moe_down_proj.weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
1408 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 369098752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a56acdfc6ac05f89ef92e6e76c7e5e56" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30680064, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.shared_experts.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
2816 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.20.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.q_proj.weight", |
|
"shape": [ |
|
3072, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.kv_a_proj_with_mqa.weight", |
|
"shape": [ |
|
576, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 24125440 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 26484736 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.kv_b_proj.weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 26485760 |
|
} |
|
], |
|
"md5sum": "1a98106be8c67ca6e81f60734fc7828f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_86.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31719424, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.shared_experts.gate_up_proj.weight", |
|
"shape": [ |
|
5632, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8650752 |
|
} |
|
], |
|
"md5sum": "0072c307e38b1d9b1a540a9e783ee063" |
|
}, |
|
{ |
|
"dataPath": "params_shard_87.bin", |
|
"format": "raw-shard", |
|
"nbytes": 738197504, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.moe_gate_up_proj.weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 738197504, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1318eb0ebaf36823959598d23d43de7e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_88.bin", |
|
"format": "raw-shard", |
|
"nbytes": 369098752, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.moe_down_proj.weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
1408 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 369098752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d26c2e179b658530d9a17379c1c9987d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_89.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30680064, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.shared_experts.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
2816 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.21.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.q_proj.weight", |
|
"shape": [ |
|
3072, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.kv_a_proj_with_mqa.weight", |
|
"shape": [ |
|
576, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 24125440 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 26484736 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.kv_b_proj.weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 26485760 |
|
} |
|
], |
|
"md5sum": "e1802f9d6d19d317d9b12de13ab6dd1b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_90.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31719424, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.shared_experts.gate_up_proj.weight", |
|
"shape": [ |
|
5632, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8650752 |
|
} |
|
], |
|
"md5sum": "5e2e6062cacb3e83133f96cd2cc88715" |
|
}, |
|
{ |
|
"dataPath": "params_shard_91.bin", |
|
"format": "raw-shard", |
|
"nbytes": 738197504, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.moe_gate_up_proj.weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 738197504, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b5fec625999a5637bd8362448162e620" |
|
}, |
|
{ |
|
"dataPath": "params_shard_92.bin", |
|
"format": "raw-shard", |
|
"nbytes": 369098752, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.moe_down_proj.weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
1408 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 369098752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f0aec2278df22d15ce0cd31aa5cb06fb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_93.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30680064, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.shared_experts.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
2816 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.22.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.22.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.q_proj.weight", |
|
"shape": [ |
|
3072, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.kv_a_proj_with_mqa.weight", |
|
"shape": [ |
|
576, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 24125440 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 26484736 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.kv_b_proj.weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 26485760 |
|
} |
|
], |
|
"md5sum": "ab50dbb1d774a03ef3b9bd45a1f27576" |
|
}, |
|
{ |
|
"dataPath": "params_shard_94.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31719424, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.shared_experts.gate_up_proj.weight", |
|
"shape": [ |
|
5632, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8650752 |
|
} |
|
], |
|
"md5sum": "d3441884a61a1d9ef456c4d37e01e519" |
|
}, |
|
{ |
|
"dataPath": "params_shard_95.bin", |
|
"format": "raw-shard", |
|
"nbytes": 738197504, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.moe_gate_up_proj.weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 738197504, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "270f2fca80123e3c2e03dafb4d11945d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_96.bin", |
|
"format": "raw-shard", |
|
"nbytes": 369098752, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.moe_down_proj.weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
1408 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 369098752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "74a8ff695a17ed0c5bdca287e3a9f69d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_97.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30680064, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.shared_experts.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
2816 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.23.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.23.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.q_proj.weight", |
|
"shape": [ |
|
3072, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.kv_a_proj_with_mqa.weight", |
|
"shape": [ |
|
576, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 24125440 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 26484736 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.kv_b_proj.weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 26485760 |
|
} |
|
], |
|
"md5sum": "495aa2d785f69996a95d601383696e0d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_98.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31719424, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.shared_experts.gate_up_proj.weight", |
|
"shape": [ |
|
5632, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8650752 |
|
} |
|
], |
|
"md5sum": "e37b643bca4a30f6523a706310e92570" |
|
}, |
|
{ |
|
"dataPath": "params_shard_99.bin", |
|
"format": "raw-shard", |
|
"nbytes": 738197504, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.moe_gate_up_proj.weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 738197504, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fad9bef60135f36949d730f44b2f88ea" |
|
}, |
|
{ |
|
"dataPath": "params_shard_100.bin", |
|
"format": "raw-shard", |
|
"nbytes": 369098752, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.moe_down_proj.weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
1408 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 369098752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "85224dab3597f298836b4089343c3119" |
|
}, |
|
{ |
|
"dataPath": "params_shard_101.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30680064, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.shared_experts.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
2816 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.24.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.24.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.q_proj.weight", |
|
"shape": [ |
|
3072, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.kv_a_proj_with_mqa.weight", |
|
"shape": [ |
|
576, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 24125440 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 26484736 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.kv_b_proj.weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 26485760 |
|
} |
|
], |
|
"md5sum": "3eaf56b119050efac9e8f15ba69b8122" |
|
}, |
|
{ |
|
"dataPath": "params_shard_102.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31719424, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.shared_experts.gate_up_proj.weight", |
|
"shape": [ |
|
5632, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8650752 |
|
} |
|
], |
|
"md5sum": "e11d8cb9d5680883bf6e28de0413d6fd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_103.bin", |
|
"format": "raw-shard", |
|
"nbytes": 738197504, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.moe_gate_up_proj.weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 738197504, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e1767425d3e359642a15db736f843d53" |
|
}, |
|
{ |
|
"dataPath": "params_shard_104.bin", |
|
"format": "raw-shard", |
|
"nbytes": 369098752, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.moe_down_proj.weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
1408 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 369098752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d421d28b021fc111a859140e195fc585" |
|
}, |
|
{ |
|
"dataPath": "params_shard_105.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30680064, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.shared_experts.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
2816 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.25.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.25.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.q_proj.weight", |
|
"shape": [ |
|
3072, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.kv_a_proj_with_mqa.weight", |
|
"shape": [ |
|
576, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 24125440 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 26484736 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.kv_b_proj.weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 26485760 |
|
} |
|
], |
|
"md5sum": "00aa0d9b27e135e66991d5df5e59fd7c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_106.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31719424, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.shared_experts.gate_up_proj.weight", |
|
"shape": [ |
|
5632, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8650752 |
|
} |
|
], |
|
"md5sum": "bd9499d319f006f5994ac1b2824ae015" |
|
}, |
|
{ |
|
"dataPath": "params_shard_107.bin", |
|
"format": "raw-shard", |
|
"nbytes": 738197504, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.moe_gate_up_proj.weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 738197504, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1c2686eaa97ce37a79a2c7e62f13da8e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_108.bin", |
|
"format": "raw-shard", |
|
"nbytes": 369098752, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.moe_down_proj.weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
1408 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 369098752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6d98de740a18641d2e2da76b56cdcc88" |
|
}, |
|
{ |
|
"dataPath": "params_shard_109.bin", |
|
"format": "raw-shard", |
|
"nbytes": 11542528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.shared_experts.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
2816 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.26.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.26.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
} |
|
], |
|
"md5sum": "a4a21384a05a39109863995203cef2a0" |
|
} |
|
] |
|
} |