llama2_q3f160 / ndarray-cache.json
ruihanglai's picture
init
a523fc2
{
"metadata": {
"ParamSize": 325,
"ParamBytes": 3048549376.0,
"BitsPerParam": 3.619307029695688
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 52736000,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
412,
32000
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52736000,
"byteOffset": 0
}
],
"md5sum": "07f8e55ee35dd00e8f86c07782831d59"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "8c8924c4b1b18cb8c2c3a6317e9311b7"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "33efdffc1b6af95dbb3c7662daf303c6"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 31492608,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
103,
32000
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6592000,
"byteOffset": 0
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 6592000
},
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 6600192
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 24688128
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 26949120
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 31484416
}
],
"md5sum": "014baaf8004e09d80979644eb5ec1c54"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "c338611843ea40c62cb24b6e4c93cbb6"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "8da9f9a4295e2d3f5a78aa0e84653166"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "ebff3a5b74713ed1c1598e05db64588f"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "bbd4cf405a5bc7bf3dc59196a10111d3"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "ede45b002a11271404cb56003536786f"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "b93fee99e544bea6ae287d733ebd01d3"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "32a08f29bb221aece7e6d5af581be687"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "b89dfbc15b98c44fb1d0b8c3528bd92d"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "b0f904587eb07e55f953e91cda01f522"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "9ac9441c2d7a71eb10362af76fc21f45"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "fedd6aa7492daa7b84d3127cd5279e60"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "e47108f54248fcf890830ab17325cf10"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "f0072616dee633be7e0353a1ec666338"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "3c3feb0be5448e5de8a785c63bb07275"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "dade4b0908536a6fdccbfa2bc0d7d4f9"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "11d7e812a942b26379d13516250afe92"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "bc69fff619c6d4176ef39b17ba5601ef"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "f3f086b103d81adec7f7ccb3a5a82b34"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "40ee8443d3a9c5ca5ba0528d19e0d735"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "6089df8d8ea3e30dda7faca94fd6a7f2"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "c8f4c86e79a99b6c27598c1a293b4203"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 52736000,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32000,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52736000,
"byteOffset": 0
}
],
"md5sum": "d1ddcaea2b2921ddbe501527c197dd71"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 32290304,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
32000,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6592000,
"byteOffset": 7602176
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 14194176
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 14202368
}
],
"md5sum": "6a9299266ad2749026b9807da9fc79dd"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "0e176568fb4600bf8e2ae1171492c732"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 29586432,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 2260992
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 6796288
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 6804480
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 27055104
}
],
"md5sum": "cb5226c2789aa3b257c4c6c282ff74f8"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "cf6dda7feef58c9ebd891ce80e70696e"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "6170f936c8fdb4d929d697cd2fe5f0bc"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "cb19c222e49ceb3d605f18e0dddf8c88"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "997cd72ab22e9e4c006ff36b4b535f89"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "41fc28efcf19bc0a4a957a4380cbc7b7"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "a58ef4bd17bf66d27d834579ec046177"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "7485fb56ad430d6dae9aeef0077691b5"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "f8eafaf364bf582016c90b99f5f64d03"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "4b31bad9b66b474a98a01bf6fbd2ea7d"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "0b49dd2ead668760eaba10a2550a62eb"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "37d6b78521b049226a2380cbaa3d12d6"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "17752870d30585ab75924279a5391832"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "3a25c3c068f14c0f4a921a9eef3baa3c"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "b399b8c263550a0477c24784f8fc90a0"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "29fb0d329829424c32f2c6e4cc96343c"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "7e2dd5062672a1f21c5ecdfb850b932d"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "a94778c911505dd60d440fb5fa77e8c6"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "aba8ad42e51ce1820f5ac17404bc6230"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "0bcdb8d6420f8cad7da4ee1f4e19d123"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "02bfa3e425b0a24930624a54c5dd2bee"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "cbe33aed3e352e75518ec0e2ec71887f"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "0640e445724c7d470b17fe6e3d2ab51e"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "b968a802eb13f012a8ac1daddd4b462c"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "334a6dfe5f04de64cd375266cb2e1028"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "671348351c8d3facaac89c7484fe67d0"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "059bc02c6f588a8eb3fb0d3a5df6823a"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "7d59a720274bf7f3d2c0eb4c17c4d643"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "5ade31c6ff5fe904c3d8f16c36c19d50"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "82a3cca5de13afdc2e99d944a6a37d94"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "ac862c2f9287918adbf697ae3165b0a0"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "8aacd7df5d3591a37bf68a9107e3c9bc"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "913c0a41d8d09a7dc84f63931b85c2d9"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "b342f3390e81dc2d6ba4ee6f27aedf77"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "0e7e99aef4f64ae58475f8716ee278b8"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "c66da7d9d01dc75b10cc91988b5be9a9"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "2b0373dbce085c407c71cb8e81b94b56"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "2e46d6ad6badc60a785880df8e1ca7f2"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "554ec4304520e245f1bf638b7b2bd742"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "87ab2571248d7e00a9b3e7b34184cf6b"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "0feafaa40f87d81befa4a3170da607ca"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "9bbb03718be11e47ec709a6479bc7379"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "ee095c22c5701beef624544e384c9002"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "b28aff858a0cf03441dd7136f93e5a35"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "78091e2424e35bb47df4bd08ba92fa6b"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "4a26feed4c8b74e73ba7d8a3393661ec"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "035a01e1016360fbc79ad49793c9489a"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "274c758862ce360905927fd2724e31a1"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "925a02a7ff20c5f5f79ed6ed8181e296"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "e7c9d6604f23e96e464f2a63a4df91a1"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "c297ba633348549bc8684f5b5261ec57"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "f0448c76fffb6a19587f0af01c4ba60e"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "a57f89a172332fc8fb7c177c8bd5e0ed"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "94dee99ac1ad914c4edbe86c87b5b1b7"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "dc02e4323568bb8159364b46f0a0c490"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "a05c11d9bd4a3a39fd47c3fc5d0294f8"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "8fc34b2bd472b29ba356d948aebbadbf"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "e0c29c82d04fe7a8caf23d2ee849a106"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "4bacb5c87ad3fda5faf5e1ba6a9eb053"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "2fdc8c38070362b0fc8c399d3e2dc6af"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "5588b35b272d254821d56587b9bf1e08"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "196be6a9cb1621c3b17f7564e6261c91"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "26f3b40ee4efbf77a3ee80ce7b45d766"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "935a62e62c30d3f4e5bf045260c590d8"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "8e49bf3469a0e1197703bc3973179e53"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "31ffd7333f847aa2946757b68902806e"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "b1c53c33e63be8815d892e6b089ade7b"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
412,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "004dbf4412b46538d0adabd067931d97"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
412,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "9a498bb9fef5c24fad84780b62037db8"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
1104,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
276,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
103,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "20f7b7d8f90fb15604cdc9c7c0d379e2"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 10125312,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
103,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
412,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
103,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
}
],
"md5sum": "2ecbea5a6fcb4b97713059b075dff1ac"
}
]
}