gemma-2-9b-it-q4f16_1-MLC / ndarray-cache.json
CharlieFRuan's picture
Upload folder using huggingface_hub
032da60 verified
{
"metadata": {
"ParamSize": 507,
"ParamBytes": 5199330304.0,
"BitsPerParam": 4.50075370326778
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 458752000,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
256000,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 458752000,
"byteOffset": 0
}
],
"md5sum": "d3b171d3cc512e5ec59a53628257ac71"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 57344000,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
256000,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344000,
"byteOffset": 0
}
],
"md5sum": "28b93f5bfd51a8afbf5366f35963ee51"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "f8a46c2ab2f7e71e1ef9c3ee371316d6"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 28908544,
"records": [
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 7168
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 25697280
}
],
"md5sum": "06036c78d4b9e3f2f6fcebd91e66e860"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "906f39752d61ec260747e6e16f1ce516"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.0.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.0.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "f987af1628d2e8dd52ea49bee47142d3"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "5e4fa5f7acd1e136d95d60294272c8b5"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.1.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.1.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "a3a6c6757cc271f4ff5db6640b043a91"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "e31e28081c7422029c6a6aec5f0ce483"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "8c60dfaa73d934a25bcae3938535df1e"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "97ff90e543ac8a1bfab6d44d9ff23f27"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.2.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.2.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "aff72d1284b4c7065984ade880bd9cac"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "d8ac9eb2c5493d91e74ea42eea78c078"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.3.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.3.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "1c0e925f03d868fc46b1cdfd0568dcba"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "45463833779de216919efb9264e9f7f1"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "5e66c7d94da805e2345093f8114f19d1"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "916f258877a6cc5bd9fd7661462864cb"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.4.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.4.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "b7284c5d4df97397ad1d5f12e7ef03fc"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "a2c3c18f75ec221fdc28b9a0aa7028c5"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.5.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.5.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "e3e0a784f02c0818d7c0bfb7f36b665a"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "9e993542e404a7c3576c31aca4f14eff"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "3d118f8be4bdf3428dd5f22bfa641f6e"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "9fe204608b23b6e4eba1dc04c7116f7a"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 31216640,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.6.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.6.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
}
],
"md5sum": "36c50e7f9d2a1d7478b0c96fa571e16e"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "95c10e1a2f56ada9c471403abc4d4ee6"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 31202304,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6422528
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21102592
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22937600
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30277632
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31195136
}
],
"md5sum": "d64edb751390dc0d533fbeb84197dd05"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "371bb5399580fbdca1ea1577a3e0e72b"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.10.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.10.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "4b481061984454075edd38a86cf46a15"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "f2ef218d512787aeadce8eae083d9b33"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "d03bcf2a4554b2fa61f63c4c78d43b70"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "aea95b85aaa20537d9f0d944ee1e2b80"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.11.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.11.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "add7fe71148bbbfbd26e10f0a81ee92e"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "23ecbaba6376b2193050397059394570"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.12.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.12.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "e0c4e8bdf848f0b87a8697291ef015b6"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "163a1e2df358cee0665cb9e57a5a6c61"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "f4da1a1a242f54d8a38e1b55ca9ea5fb"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "92c00541df0147d61d45dc2986ea6190"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.13.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.13.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "b522850d5fd5f673dffbc05a00451485"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "9fb1cc4fd7a00ada55cbbb560e7c65b0"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.14.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.14.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "4e7738e6ee7251870316de734d167689"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "cd12e921b48cbd688b07bc54709b8b83"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "a56c7b4369ad2d832adc91500e592b42"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "20e5692af90110900ebbfb489310b665"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.15.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.15.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "f74e0f38b2183b3ef60d21d0fe3ae93a"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "6594ad3a46309e8aebd9ccf79a7a957a"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.16.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.16.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "39e9e89a08cdcbfa9a3518906e0cc9ca"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "720b891234d897021909fbc3d141b646"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "8d335eb31d1e8a728f8fe9163fcab066"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "2b439e42ad5a6f4839727e583eb8df49"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.17.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.17.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "f6827e6f4d875afdbd35af1e4f629412"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "6c7939a4c25f05a2cc5be0234a061f7b"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.18.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.18.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "2fa35aef227ca48432986d5507bd681c"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "2f8336b59644b695ee87ef143b79b9fe"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "fb417330890a2437a72a942cd86c16ba"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 31216640,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.19.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.19.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
}
],
"md5sum": "ec0aa90efc130755daa6789557d268cf"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "e7eb383c4dabd9be1b93bbb5efb0e2b4"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "9dd8e67f22b2ceb296e85a9aa232b55c"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "4cf90ce687b4fb3d4ea7b3f90e4023fc"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 31230976,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 14680064
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 16515072
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 23855104
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 24772608
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 24779776
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 27991040
},
{
"name": "model.layers.7.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 27998208
},
{
"name": "model.layers.7.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 28005376
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 28012544
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 28019712
}
],
"md5sum": "6d80206aa980136c92ebbd3b262570fb"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "29c0645b7e6f5ab8c46e6a8f901967b0"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.8.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.8.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "2b2462c1a81cb85a9d36e8030abe202f"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "eafdf22af0bfe6075a1369c6764c87a1"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.9.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.9.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "d3973c5e603a0a9bafa636121f309d53"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "f08a1485be3cf283a4dbbb5d1d48d26b"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "0ed5c6afaddea52265cba1f91185973c"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 32141312,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.20.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.20.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6444032
},
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 6451200
}
],
"md5sum": "ad6db960abca0cacff77cc0aca545478"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "99d4e9909ede3f88d38b423008738621"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.21.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.21.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "bb28c4e79b1318b64b64d39e8325a48b"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "5bbdaa043e1af8b5ac85678d1703a758"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "2d1bda4fb7ad4d3620c80cd4df5ce2bc"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "06299f3292e09893a5b11f687b3e16eb"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.22.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.22.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "3c1337273e79bc89972d3c4c9e786f7c"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "54baf2c8857cad9087a4ee505675722e"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.23.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.23.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "cf6e62a73a37911c689aa0a11fa74c27"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "bbee19f4ebfe7ef89a5b8db5dc114466"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "ac63c108a0f62a35949c3a826ef3a1d0"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "bcb127c6ffd065f3f1ea7cfa80354c19"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.24.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.24.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "94d6c1c7b049097d7a4c19a948af97a3"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "ed4c6b924526344a508b75bc127a7574"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.25.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.25.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "2b92f0f769204ff891a5394a1e0369c1"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "1fff2398d02b8d70f1b44e87c932e3de"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "c9f03e2a737f75b71b62638dd66250b2"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "0e5046e5521330eb7659c33c011e9063"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.26.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.26.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "60e82d5a37b635728134cb1bd5e39832"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "b0427232287f0265be39c651c51ed2b9"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.27.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.27.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "d8021aeeb4ec902d0f0878bad635b3a6"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "dc53d7ea9dc19e96b978fb41197ff277"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "1348b2d309d7aa35e19c1dc98f2eef51"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "22689ade5eca450339ae62c089e32b64"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.28.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.28.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "0e5a68b92b3c95cf22c3ffd1c2b661fc"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "c116da98bfc9e6c0a7c44bcf11a81f4b"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.29.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.29.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "80f7787cc7c3b2be9ac6c51c0057445c"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "7b3a167159d581b19e98ea9708385e7e"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "af6d35c77d262d674c1ddf12a638863f"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "5ca9431e01052921e3ec7b48cbcc2ece"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.30.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.30.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "c0de772e7dd7077421739304c6af9cfb"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "4b993c99435db63e2bff9fe008bc771a"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.31.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.31.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "55a8e8c7ff466fe850c34ffe70a470fd"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "2c5610c50e749b9d29d74e306e8222a7"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "e66305c5365d18b9fa74b4b1a20a2c55"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 32119808,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 917504
},
{
"name": "model.layers.32.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 7340032
},
{
"name": "model.layers.32.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22020096
},
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 23855104
},
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 31195136
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 32112640
}
],
"md5sum": "27618554ec3aaca397c0b47f6e623dcd"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "3ab5ef61a55efd0279d751404adf19c2"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 32141312,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 3211264
},
{
"name": "model.layers.32.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 3218432
},
{
"name": "model.layers.32.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 3225600
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 3232768
},
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 3239936
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 28930048
}
],
"md5sum": "27cdc7335ce7f16a694587a714da39b8"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "ae7e19107b03a6dba44fe2b45c39291c"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.33.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.33.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.33.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.33.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "8e5c98e7aa4e25db4753e4097d49bb63"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "d1d3fccecaa5c5368e6467d61ba7b179"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.34.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.34.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.34.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.34.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "c4654685a935251adec5881d289f5cbb"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "0e5b9c2e1d526a24f045688190e5a4e6"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "2f1b4011c4966e3f10b5a9eb8f4665df"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "3f4228324f40ffab79f9748876e90f14"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.35.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.35.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.35.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.35.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "00b402a90cd0c5a76c26b9dd89871d8d"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "3303e8d257006abaed990487f45ea8aa"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.36.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.36.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.36.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.36.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.36.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.36.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "e4dcba05ac763bcf324f6ca49a2a046e"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "15dd5845e2247f0447bb1011c956d36e"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.36.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.37.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.37.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "1770cbf77aa14a70eaba2809d1fb230c"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "1b7da1fc51b78ac2f9e62463a7a3a76b"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.37.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.37.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.37.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.37.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.37.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.37.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "dd3919b05cef8d32ee7f42e9a1ed8806"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "0240ec08aef9c8254cbe8040c09d6c99"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.38.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.38.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.38.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.38.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.38.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.38.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "26326ddfd830ead502227194b2b7915a"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "0298d1c5f1741eb12aa7539b49e5d822"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.38.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.39.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.39.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "38d6304590515cb5e7aa5f7f9ab2835e"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.40.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "2ae007d7eb21a12bbc197f30da749d0d"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.39.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.39.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.39.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.39.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.39.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.39.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.layers.40.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "c98f7316c5c4be975be68af20fc4350a"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.40.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "b357078392d4b5772e38a1eb8b65ffc6"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 33510400,
"records": [
{
"name": "model.layers.40.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 0
},
{
"name": "model.layers.40.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 3211264
},
{
"name": "model.layers.40.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9633792
},
{
"name": "model.layers.40.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9640960
},
{
"name": "model.layers.40.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9648128
},
{
"name": "model.layers.40.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9655296
},
{
"name": "model.layers.40.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24335360
},
{
"name": "model.layers.40.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 26170368
}
],
"md5sum": "0ee17e53775e75626769947781012409"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 51380224,
"records": [
{
"name": "model.layers.41.mlp.gate_up_proj.q_weight",
"shape": [
28672,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 51380224,
"byteOffset": 0
}
],
"md5sum": "d4906be588326782050c14064f7d5241"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 29826048,
"records": [
{
"name": "model.layers.40.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.41.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 917504
},
{
"name": "model.layers.41.mlp.down_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 924672
},
{
"name": "model.layers.41.mlp.down_proj.q_scale",
"shape": [
3584,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3211264,
"byteOffset": 26614784
}
],
"md5sum": "10321cd25d0e2edf534c1eb6925c4006"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 31223808,
"records": [
{
"name": "model.layers.41.mlp.gate_up_proj.q_scale",
"shape": [
28672,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.41.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6422528
},
{
"name": "model.layers.41.post_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6429696
},
{
"name": "model.layers.41.pre_feedforward_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6436864
},
{
"name": "model.layers.41.self_attn.qkv_proj.q_weight",
"shape": [
8192,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 6444032
},
{
"name": "model.layers.41.self_attn.qkv_proj.q_scale",
"shape": [
8192,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21124096
},
{
"name": "model.layers.41.self_attn.o_proj.q_weight",
"shape": [
3584,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22959104
},
{
"name": "model.layers.41.self_attn.o_proj.q_scale",
"shape": [
3584,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 30299136
},
{
"name": "model.norm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 31216640
}
],
"md5sum": "3b60bd678385798fbc2981a86c32ec64"
}
]
}