|
{ |
|
"metadata": { |
|
"ParamSize": 325, |
|
"ParamBytes": 3048549376.0, |
|
"BitsPerParam": 3.619307029695688 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52736000, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_weight", |
|
"shape": [ |
|
412, |
|
32000 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52736000, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "07f8e55ee35dd00e8f86c07782831d59" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8c8924c4b1b18cb8c2c3a6317e9311b7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 20250624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "33efdffc1b6af95dbb3c7662daf303c6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31492608, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_scale", |
|
"shape": [ |
|
103, |
|
32000 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6592000, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.24.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 6592000 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 6600192 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 24688128 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 26949120 |
|
}, |
|
{ |
|
"name": "model.layers.24.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 31484416 |
|
} |
|
], |
|
"md5sum": "014baaf8004e09d80979644eb5ec1c54" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c338611843ea40c62cb24b6e4c93cbb6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30482432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 2531328 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.25.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 10125312 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 10133504 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 28221440 |
|
} |
|
], |
|
"md5sum": "8da9f9a4295e2d3f5a78aa0e84653166" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27325440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.25.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 4535296 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 4543488 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 24794112 |
|
} |
|
], |
|
"md5sum": "ebff3a5b74713ed1c1598e05db64588f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bbd4cf405a5bc7bf3dc59196a10111d3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 20250624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ede45b002a11271404cb56003536786f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32494592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 6750208 |
|
}, |
|
{ |
|
"name": "model.layers.26.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 7593984 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 7602176 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 25690112 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 27951104 |
|
}, |
|
{ |
|
"name": "model.layers.26.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 32486400 |
|
} |
|
], |
|
"md5sum": "b93fee99e544bea6ae287d733ebd01d3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "32a08f29bb221aece7e6d5af581be687" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30482432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 2531328 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.27.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 10125312 |
|
}, |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 10133504 |
|
}, |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 28221440 |
|
} |
|
], |
|
"md5sum": "b89dfbc15b98c44fb1d0b8c3528bd92d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27325440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.27.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 4535296 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 4543488 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 24794112 |
|
} |
|
], |
|
"md5sum": "b0f904587eb07e55f953e91cda01f522" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9ac9441c2d7a71eb10362af76fc21f45" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 20250624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fedd6aa7492daa7b84d3127cd5279e60" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32494592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 6750208 |
|
}, |
|
{ |
|
"name": "model.layers.28.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 7593984 |
|
}, |
|
{ |
|
"name": "model.layers.28.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 7602176 |
|
}, |
|
{ |
|
"name": "model.layers.28.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 25690112 |
|
}, |
|
{ |
|
"name": "model.layers.28.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 27951104 |
|
}, |
|
{ |
|
"name": "model.layers.28.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 32486400 |
|
} |
|
], |
|
"md5sum": "e47108f54248fcf890830ab17325cf10" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f0072616dee633be7e0353a1ec666338" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30482432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 2531328 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.29.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 10125312 |
|
}, |
|
{ |
|
"name": "model.layers.29.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 10133504 |
|
}, |
|
{ |
|
"name": "model.layers.29.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 28221440 |
|
} |
|
], |
|
"md5sum": "3c3feb0be5448e5de8a785c63bb07275" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27325440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.29.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 4535296 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 4543488 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 24794112 |
|
} |
|
], |
|
"md5sum": "dade4b0908536a6fdccbfa2bc0d7d4f9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "11d7e812a942b26379d13516250afe92" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 20250624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bc69fff619c6d4176ef39b17ba5601ef" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32494592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 6750208 |
|
}, |
|
{ |
|
"name": "model.layers.30.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 7593984 |
|
}, |
|
{ |
|
"name": "model.layers.30.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 7602176 |
|
}, |
|
{ |
|
"name": "model.layers.30.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 25690112 |
|
}, |
|
{ |
|
"name": "model.layers.30.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 27951104 |
|
}, |
|
{ |
|
"name": "model.layers.30.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 32486400 |
|
} |
|
], |
|
"md5sum": "f3f086b103d81adec7f7ccb3a5a82b34" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "40ee8443d3a9c5ca5ba0528d19e0d735" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30482432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 2531328 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.31.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 10125312 |
|
}, |
|
{ |
|
"name": "model.layers.31.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 10133504 |
|
}, |
|
{ |
|
"name": "model.layers.31.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 28221440 |
|
} |
|
], |
|
"md5sum": "6089df8d8ea3e30dda7faca94fd6a7f2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27325440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.31.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 4535296 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 4543488 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 24794112 |
|
} |
|
], |
|
"md5sum": "c8f4c86e79a99b6c27598c1a293b4203" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52736000, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.q_weight", |
|
"shape": [ |
|
32000, |
|
412 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52736000, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d1ddcaea2b2921ddbe501527c197dd71" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32290304, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 6750208 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 7593984 |
|
}, |
|
{ |
|
"name": "model.embed_tokens.q_scale", |
|
"shape": [ |
|
32000, |
|
103 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6592000, |
|
"byteOffset": 7602176 |
|
}, |
|
{ |
|
"name": "model.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 14194176 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 14202368 |
|
} |
|
], |
|
"md5sum": "6a9299266ad2749026b9807da9fc79dd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0e176568fb4600bf8e2ae1171492c732" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29586432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 2260992 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 6796288 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 6804480 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 27055104 |
|
} |
|
], |
|
"md5sum": "cb5226c2789aa3b257c4c6c282ff74f8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cf6dda7feef58c9ebd891ce80e70696e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 20250624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6170f936c8fdb4d929d697cd2fe5f0bc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32494592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 6750208 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 7593984 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 7602176 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 25690112 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 27951104 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 32486400 |
|
} |
|
], |
|
"md5sum": "cb19c222e49ceb3d605f18e0dddf8c88" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "997cd72ab22e9e4c006ff36b4b535f89" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30482432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 2531328 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.10.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 10125312 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 10133504 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 28221440 |
|
} |
|
], |
|
"md5sum": "41fc28efcf19bc0a4a957a4380cbc7b7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27325440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 4535296 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 4543488 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 24794112 |
|
} |
|
], |
|
"md5sum": "a58ef4bd17bf66d27d834579ec046177" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7485fb56ad430d6dae9aeef0077691b5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 20250624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f8eafaf364bf582016c90b99f5f64d03" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32494592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 6750208 |
|
}, |
|
{ |
|
"name": "model.layers.11.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 7593984 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 7602176 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 25690112 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 27951104 |
|
}, |
|
{ |
|
"name": "model.layers.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 32486400 |
|
} |
|
], |
|
"md5sum": "4b31bad9b66b474a98a01bf6fbd2ea7d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0b49dd2ead668760eaba10a2550a62eb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30482432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 2531328 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.12.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 10125312 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 10133504 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 28221440 |
|
} |
|
], |
|
"md5sum": "37d6b78521b049226a2380cbaa3d12d6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27325440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 4535296 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 4543488 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 24794112 |
|
} |
|
], |
|
"md5sum": "17752870d30585ab75924279a5391832" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3a25c3c068f14c0f4a921a9eef3baa3c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 20250624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b399b8c263550a0477c24784f8fc90a0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32494592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 6750208 |
|
}, |
|
{ |
|
"name": "model.layers.13.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 7593984 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 7602176 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 25690112 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 27951104 |
|
}, |
|
{ |
|
"name": "model.layers.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 32486400 |
|
} |
|
], |
|
"md5sum": "29fb0d329829424c32f2c6e4cc96343c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7e2dd5062672a1f21c5ecdfb850b932d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30482432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 2531328 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.14.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 10125312 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 10133504 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 28221440 |
|
} |
|
], |
|
"md5sum": "a94778c911505dd60d440fb5fa77e8c6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27325440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 4535296 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 4543488 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 24794112 |
|
} |
|
], |
|
"md5sum": "aba8ad42e51ce1820f5ac17404bc6230" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0bcdb8d6420f8cad7da4ee1f4e19d123" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 20250624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "02bfa3e425b0a24930624a54c5dd2bee" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32494592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 6750208 |
|
}, |
|
{ |
|
"name": "model.layers.15.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 7593984 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 7602176 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 25690112 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 27951104 |
|
}, |
|
{ |
|
"name": "model.layers.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 32486400 |
|
} |
|
], |
|
"md5sum": "cbe33aed3e352e75518ec0e2ec71887f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0640e445724c7d470b17fe6e3d2ab51e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30482432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 2531328 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.16.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 10125312 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 10133504 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 28221440 |
|
} |
|
], |
|
"md5sum": "b968a802eb13f012a8ac1daddd4b462c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27325440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 4535296 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 4543488 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 24794112 |
|
} |
|
], |
|
"md5sum": "334a6dfe5f04de64cd375266cb2e1028" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "671348351c8d3facaac89c7484fe67d0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 20250624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "059bc02c6f588a8eb3fb0d3a5df6823a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32494592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 6750208 |
|
}, |
|
{ |
|
"name": "model.layers.17.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 7593984 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 7602176 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 25690112 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 27951104 |
|
}, |
|
{ |
|
"name": "model.layers.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 32486400 |
|
} |
|
], |
|
"md5sum": "7d59a720274bf7f3d2c0eb4c17c4d643" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5ade31c6ff5fe904c3d8f16c36c19d50" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30482432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 2531328 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.18.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 10125312 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 10133504 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 28221440 |
|
} |
|
], |
|
"md5sum": "82a3cca5de13afdc2e99d944a6a37d94" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27325440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 4535296 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 4543488 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 24794112 |
|
} |
|
], |
|
"md5sum": "ac862c2f9287918adbf697ae3165b0a0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8aacd7df5d3591a37bf68a9107e3c9bc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 20250624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "913c0a41d8d09a7dc84f63931b85c2d9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32494592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 6750208 |
|
}, |
|
{ |
|
"name": "model.layers.19.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 7593984 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 7602176 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 25690112 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 27951104 |
|
}, |
|
{ |
|
"name": "model.layers.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 32486400 |
|
} |
|
], |
|
"md5sum": "b342f3390e81dc2d6ba4ee6f27aedf77" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0e7e99aef4f64ae58475f8716ee278b8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30482432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 2531328 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 10125312 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 10133504 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 28221440 |
|
} |
|
], |
|
"md5sum": "c66da7d9d01dc75b10cc91988b5be9a9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27325440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 4535296 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 4543488 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 24794112 |
|
} |
|
], |
|
"md5sum": "2b0373dbce085c407c71cb8e81b94b56" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2e46d6ad6badc60a785880df8e1ca7f2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 20250624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "554ec4304520e245f1bf638b7b2bd742" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32494592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 6750208 |
|
}, |
|
{ |
|
"name": "model.layers.20.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 7593984 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 7602176 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 25690112 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 27951104 |
|
}, |
|
{ |
|
"name": "model.layers.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 32486400 |
|
} |
|
], |
|
"md5sum": "87ab2571248d7e00a9b3e7b34184cf6b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0feafaa40f87d81befa4a3170da607ca" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30482432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 2531328 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.21.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 10125312 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 10133504 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 28221440 |
|
} |
|
], |
|
"md5sum": "9bbb03718be11e47ec709a6479bc7379" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27325440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 4535296 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 4543488 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 24794112 |
|
} |
|
], |
|
"md5sum": "ee095c22c5701beef624544e384c9002" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b28aff858a0cf03441dd7136f93e5a35" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 20250624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "78091e2424e35bb47df4bd08ba92fa6b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32494592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 6750208 |
|
}, |
|
{ |
|
"name": "model.layers.22.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 7593984 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 7602176 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 25690112 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 27951104 |
|
}, |
|
{ |
|
"name": "model.layers.22.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 32486400 |
|
} |
|
], |
|
"md5sum": "4a26feed4c8b74e73ba7d8a3393661ec" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "035a01e1016360fbc79ad49793c9489a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30482432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 2531328 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.23.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 10125312 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 10133504 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 28221440 |
|
} |
|
], |
|
"md5sum": "274c758862ce360905927fd2724e31a1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27325440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.23.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 4535296 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 4543488 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 24794112 |
|
} |
|
], |
|
"md5sum": "925a02a7ff20c5f5f79ed6ed8181e296" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e7c9d6604f23e96e464f2a63a4df91a1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 20250624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c297ba633348549bc8684f5b5261ec57" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32494592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 6750208 |
|
}, |
|
{ |
|
"name": "model.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 7593984 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 7602176 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 25690112 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 27951104 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 32486400 |
|
} |
|
], |
|
"md5sum": "f0448c76fffb6a19587f0af01c4ba60e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a57f89a172332fc8fb7c177c8bd5e0ed" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30482432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 2531328 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.4.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 10125312 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 10133504 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 28221440 |
|
} |
|
], |
|
"md5sum": "94dee99ac1ad914c4edbe86c87b5b1b7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27325440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 4535296 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 4543488 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 24794112 |
|
} |
|
], |
|
"md5sum": "dc02e4323568bb8159364b46f0a0c490" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a05c11d9bd4a3a39fd47c3fc5d0294f8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 20250624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8fc34b2bd472b29ba356d948aebbadbf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32494592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 6750208 |
|
}, |
|
{ |
|
"name": "model.layers.5.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 7593984 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 7602176 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 25690112 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 27951104 |
|
}, |
|
{ |
|
"name": "model.layers.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 32486400 |
|
} |
|
], |
|
"md5sum": "e0c29c82d04fe7a8caf23d2ee849a106" |
|
}, |
|
{ |
|
"dataPath": "params_shard_86.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4bacb5c87ad3fda5faf5e1ba6a9eb053" |
|
}, |
|
{ |
|
"dataPath": "params_shard_87.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30482432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 2531328 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.6.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 10125312 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 10133504 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 28221440 |
|
} |
|
], |
|
"md5sum": "2fdc8c38070362b0fc8c399d3e2dc6af" |
|
}, |
|
{ |
|
"dataPath": "params_shard_88.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27325440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 4535296 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 4543488 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 24794112 |
|
} |
|
], |
|
"md5sum": "5588b35b272d254821d56587b9bf1e08" |
|
}, |
|
{ |
|
"dataPath": "params_shard_89.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "196be6a9cb1621c3b17f7564e6261c91" |
|
}, |
|
{ |
|
"dataPath": "params_shard_90.bin", |
|
"format": "raw-shard", |
|
"nbytes": 20250624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "26f3b40ee4efbf77a3ee80ce7b45d766" |
|
}, |
|
{ |
|
"dataPath": "params_shard_91.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32494592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 6750208 |
|
}, |
|
{ |
|
"name": "model.layers.7.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 7593984 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 7602176 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 25690112 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 27951104 |
|
}, |
|
{ |
|
"name": "model.layers.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 32486400 |
|
} |
|
], |
|
"md5sum": "935a62e62c30d3f4e5bf045260c590d8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_92.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8e49bf3469a0e1197703bc3973179e53" |
|
}, |
|
{ |
|
"dataPath": "params_shard_93.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30482432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 2531328 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.8.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 10125312 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 10133504 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 28221440 |
|
} |
|
], |
|
"md5sum": "31ffd7333f847aa2946757b68902806e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_94.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27325440, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 4535296 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 4543488 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 24794112 |
|
} |
|
], |
|
"md5sum": "b1c53c33e63be8815d892e6b089ade7b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_95.bin", |
|
"format": "raw-shard", |
|
"nbytes": 36282368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
412, |
|
22016 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 36282368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "004dbf4412b46538d0adabd067931d97" |
|
}, |
|
{ |
|
"dataPath": "params_shard_96.bin", |
|
"format": "raw-shard", |
|
"nbytes": 20250624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
412, |
|
12288 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20250624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9a498bb9fef5c24fad84780b62037db8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_97.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32494592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 6750208 |
|
}, |
|
{ |
|
"name": "model.layers.9.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 7593984 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1104, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18087936, |
|
"byteOffset": 7602176 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_scale", |
|
"shape": [ |
|
276, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2260992, |
|
"byteOffset": 25690112 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
103, |
|
22016 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4535296, |
|
"byteOffset": 27951104 |
|
}, |
|
{ |
|
"name": "model.layers.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 32486400 |
|
} |
|
], |
|
"md5sum": "20f7b7d8f90fb15604cdc9c7c0d379e2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_98.bin", |
|
"format": "raw-shard", |
|
"nbytes": 10125312, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
103, |
|
12288 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2531328, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
412, |
|
4096 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6750208, |
|
"byteOffset": 2531328 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
103, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 843776, |
|
"byteOffset": 9281536 |
|
} |
|
], |
|
"md5sum": "2ecbea5a6fcb4b97713059b075dff1ac" |
|
} |
|
] |
|
} |