CharlieFRuan's picture
Initial commit
a1dcef4 verified
{
"metadata": {
"ParamSize": 325,
"ParamBytes": 4212490240.0,
"BitsPerParam": 5.001066770081567
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 65568768,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65568768,
"byteOffset": 0
}
],
"md5sum": "f85e52248bbea32b22b0980b36626192"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 30748672,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8196096,
"byteOffset": 0
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8196096
},
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8204288
}
],
"md5sum": "43bda03c7468d9d400fd49d0574745a4"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "dbb6f85e88c3a6cca9695f6035724db5"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 28196864,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 2818048
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 2826240
},
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 2834432
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 25378816
}
],
"md5sum": "06be15bccf1c9bda0bae3f639cf41196"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 30810112,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 0
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 5636096
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 5644288
}
],
"md5sum": "9171f0f1e5a5c051fd0382c2912d9c05"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "a951dbc3cdac7df6d7d59d3e90d73fc0"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "65d57daffea784b10f35b538a9a83267"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "bfecb540df73f5d0f45e8dc6e75cc8ff"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 12582912
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12591104
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 15409152
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "8fac56d965afd42d0b4dd0bbc0d215ba"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "6e60247cb4dbe69a242d33a387bd26f3"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a70f87c9e61c21167af0e920624ac157"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 1048576
},
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 1056768
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 23601152
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 26419200
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "470e521324fe33497ea9994a3af244fe"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "5d86dadf6d55316cff7cdeac7674cf09"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "fcca77b3c7ea7486e5feb30e632bb6ae"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "1dc2ea571ff8a58063635cdacabc4c5d"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 12582912
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12591104
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 15409152
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "a39c93f23f60a2a9045ffb54f9cf5e03"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "463474766c3e71481251c276a1e4c0d4"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "8b13b66604e8eb8a8524a663a12ae435"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 1048576
},
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 1056768
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 23601152
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 26419200
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "564d9d4e741fae401c7518fe8ab09fb7"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "2b548f620bd7cfaa32f321fb1bc38d70"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "cee5f537da07377b0a06aee839a219a1"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a91cd8dbfb5caf9cce070e664c08e0c8"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 12582912
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12591104
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 15409152
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "a70f57084bcecd215209da80b010ae19"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "7c94ea992d73bcb766d04679b1b66f8c"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "73331862db4bef89665eaf0cc82a345d"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 1048576
},
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 1056768
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 23601152
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 26419200
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "58a6cad2d963f4f2351265d04ce96fa9"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "69e6c0cec60c85b386334b530b0ceb87"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "23ac325136ccd715cb9d8a273c5ec2c2"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "6a654b4f453f6900fbada0ec644739cd"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 12582912
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12591104
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 15409152
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "c93b1a53c04c05c2dd60b61a390a0eb7"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 65568768,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65568768,
"byteOffset": 0
}
],
"md5sum": "ae9806958f35a4fddc18017adcb54c23"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 31805440,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 1048576
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
32016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8196096,
"byteOffset": 1056768
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9252864
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9261056
}
],
"md5sum": "1805e0729b56e4b7d602fd6ee819724d"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "b7aecfdd4af0b1bdd32122530f108227"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "ff5934980f24c278a9f64268dc0f851e"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "52cf3cc62d6b2b51fd41efe74fc11928"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "e1c46d7580e6197b56cf26cbc7b426bb"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "ec571c66edc49c0a193768c623a00b62"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 32661504,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 2818048
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8454144
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8462336
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11608064
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19996672
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21053440
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 23871488
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29507584
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29515776
}
],
"md5sum": "689806017f010fe8a45977f7a711c4a3"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 31989760,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9445376
}
],
"md5sum": "6f2ca4c26975ff1335884c652ce5ff67"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "b9a4ad9aea2161f3a18e22a84538151d"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "9ef4c75f079479eea4c360652a67b63e"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "37fbb0c658ca57f6dacc079b05b2df3b"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b2fd3f71dd8f21cb26e81145701ece2f"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 29827072,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 2818048
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8454144
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8462336
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11608064
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19996672
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 21045248
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 26681344
}
],
"md5sum": "cefed4e96f83e8d2e5981d8fa3a7484a"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 31989760,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9445376
}
],
"md5sum": "c3a7e3674910ef8718173da1ca833217"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "89085373bbe49a1b167d60d42ea10206"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "2b626b94329f58227f2d016e662cf696"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "9c8cfc693c781b0a0b8fecc43de4146c"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "2f9a3ccc1551d96e918c45e51fb2b192"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "7eb087afd1ac88cf17bf7c527e57212b"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 32661504,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 2818048
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8454144
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8462336
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11608064
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19996672
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21053440
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 23871488
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29507584
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29515776
}
],
"md5sum": "d4945041c2b28bd91b3fdc8a16ef8d26"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 31989760,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9445376
}
],
"md5sum": "4300105f71276edaf6172a4fbc36cc74"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "54910d3f3be6c18adc08d7c7ee01e8c9"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "7de4f830ac60079e10c9ebc604406913"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "b9d6486d7b29d6636bd71740882623a1"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "07e6546cb5f774442b568e03be1d03dd"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "3c6f8f7f5a19d770f8995073d5f9f308"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 32661504,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 2818048
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8454144
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8462336
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11608064
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19996672
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21053440
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 23871488
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29507584
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29515776
}
],
"md5sum": "12098c2e1498a37dc4be51911e8c63cc"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 31989760,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9445376
}
],
"md5sum": "c0466976c18f9f97b0d9986fde7635f2"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "160bc1979c8c8fa45d9c618d6efc1ab0"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c4c3f380879cf167b4d7e78b5f4d2f35"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "f0b6269991d7f37049951e55b6ed2b91"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "9b5e941c820017927e1f221dcbe41004"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "058b675143f655dc2647bc822f82c5d9"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 32661504,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 2818048
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8454144
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8462336
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11608064
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19996672
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21053440
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 23871488
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29507584
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29515776
}
],
"md5sum": "5f62d3a85675bf698e97b790b215f417"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 31989760,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9445376
}
],
"md5sum": "ba11089c71a3ebe721c9c0151a31851b"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "2380b55d2043529eee74f0c9fe95cf59"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "2c5039ac9a48f41f2b437d51850a3c85"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "7ae37dde2ac65c5b110f1822b9df2961"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "e193235eba60e70ad7e968ed2e969ed8"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "068b912fa6d7871cad9061b0824788da"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 32661504,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 2818048
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8454144
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8462336
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11608064
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19996672
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21053440
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 23871488
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29507584
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29515776
}
],
"md5sum": "05e3a36e843241896d1dee85646d3027"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 31989760,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9445376
}
],
"md5sum": "6ea4228b1dc3543ed59df250c9610ed4"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "bcdda4511cb8ed0a99c973c02900ee6a"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 28196864,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 2818048
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 2826240
},
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 2834432
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 25378816
}
],
"md5sum": "eda79c0093b611513c9188407d1db223"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 30810112,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 0
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 5636096
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 5644288
}
],
"md5sum": "505a1eb72e6c55653a771df059c9f5e9"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "8dccfe1381d54d755832d69867b678ac"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "fab94610f2a5f1ec915c09cba90e9f2b"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "de8ee1ba3bc2491501dcfb8c17084fa7"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 12582912
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12591104
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 15409152
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "d0c75d6ba9d0f9bd28266f6102b168da"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "7619401648db4fc407bd8e232b4a2a21"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d3849b67441e370178272533cc5c4513"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 1048576
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 1056768
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 23601152
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 26419200
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "0a74acd70a9357fb629236ab787cf056"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "03ed8ea475eece58d7770c306f5fd530"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "559f9f6e313a72fd7d63d0391e54f510"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "90243f46167214da48b90ca60896ff19"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 12582912
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12591104
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 15409152
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "4d91ee3a87fabcd7b24feabee9518029"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "0dbbfde2420851cc4bed86d67ed31773"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "99db738659f01ec30d9605b6b6d5b128"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 1048576
},
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 1056768
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 23601152
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 26419200
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "2df9139b2a11edbf6cb0eebf0ad52108"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "628efe4f52b4bde4318f35053d889cce"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "42f78bd94074e3f6b3b8ffc043eb09d9"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b3fd0193e07eb1add788f9082e148f35"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 12582912
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12591104
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 15409152
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "b4e04e902312643cb7862ff50ad6b66c"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "7eacd4e7338671417596fcb59202d272"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "0e97915b1470363b79fd210d43fc8ba1"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 1048576
},
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 1056768
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 23601152
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 26419200
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "5ce369f17224dca75d9cb0e4a5990bf8"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "0271a2d2ba716727b8b87a8a0c98f085"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "685f3cf5a494bddc7d34562c6947603c"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d0392528e31a00d0854e7b3754184476"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 12582912
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12591104
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 15409152
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "8e66810352103772c88eb05039c08897"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "b1dc968194e74582e1c49cbfd125927f"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "814bf460a614341b3d859a3b93497f9a"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 1048576
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 1056768
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 23601152
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 26419200
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "e28f0db679a13d6eae3d6d1acec5d924"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "a518f8e4244fe0d8ccc0c8740d4f61d9"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "4c0ad39c7b643e1ed7f9282531478469"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a20c350842aa062f87f767b54327bfe3"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 12582912
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12591104
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 15409152
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "9a22d89967b23c1351168a0276b748bc"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "734f62e45f6b285dbe2a61faa9d688c1"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c9056eed6284eb476df94655b83ae459"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 1048576
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 1056768
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 23601152
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 26419200
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "cdf1957b3485bd200c5f3c2fcbf65e63"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "c7e7bca10d71242ecf82aa4cbfebf45f"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "1734f11cb82f44c482f1507d9ef98770"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 30801920,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 12582912
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 18219008
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21364736
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 29753344
}
],
"md5sum": "6ee8c0dfe33b437ecf48f88d1b087d0b"
}
]
}