Llama-3.1-8B-q4f32_1-MLC / ndarray-cache-b16.json
CharlieFRuan's picture
Upload folder using huggingface_hub
f4027ef verified
raw
history blame
147 kB
{
"metadata": {
"ParamSize": 325,
"ParamBytes": 5019811840.0,
"BitsPerParam": 5.000895173865207
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 262668288,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
128256,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 262668288,
"byteOffset": 0
}
],
"md5sum": "c289832458fade48271f3b2b6845cdc7"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "3e98ea4309cdc7d2d4831bb0aa3d08ad"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 32841728,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
128256,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 32833536,
"byteOffset": 0
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 32833536
}
],
"md5sum": "25cb57ca8305905981c741b80c50001e"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 262668288,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
128256,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 262668288,
"byteOffset": 0
}
],
"md5sum": "67ff68c9dcc744f27f113af5a6e8736d"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 32833536,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
128256,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 32833536,
"byteOffset": 0
}
],
"md5sum": "d16f14a8eee304f4e15f9494bfc7b7de"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 33054720,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3678208
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3686400
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 3694592
}
],
"md5sum": "f8cf2847f857b0d7d480a25c257f5176"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "a9599a70aa592590c2d16d1b7cfad304"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "6739d73fd438192dd19313d830aa5710"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "91542ec74f8cd739da2a2a598893f384"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "f3d8afef97d9d7f560cc1c17ecafe938"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "26baaf298c866eeab2d3250a6b5cc3e4"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "0c14e6b6ed7ad0425a97f2c4bde4c4d0"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "f4af0e406a8becf1d60d57a77abf2299"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "623306bb726f3c7c9b21f3b34dc058ab"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "3d93a45407865a84290b41553a8639ab"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "38456870cae5b154a07f6cb520f7dbf0"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "28dbd747e16220a2416f13a6a7679e6d"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "305fff66859d72c2506f6f546dcbba48"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "1605458575ce88e52698064d30a6117e"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "574642eee9addc19c8fedb7b3ed45376"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "f2a90623ce62434af92ea7329424a48a"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "37a4393158d690e1329b847bbe0a6c92"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "713fd6367bb891ea671bd3d68ca24f28"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "28c4f82d91955b45edd840738978e985"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "c8b63b416303357c679641e6f460fce5"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "3f2a916e3cab106abae1b11bd0d02fe3"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "313c126b1d6fe867e031a5b01aeeb7f9"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "0aec31f62e2073b2bfa9738924c8a631"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "d47a92e05f033a59b683986cd67d0dc9"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "1ee12c221fcc0c82b0e59173ad95609f"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "7748c714bef73242c48033c89fde6325"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "3676e0adf61110e8ff5b5047ed5ed780"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "f5e382e9b1cb409cf8bd9a9a95a3133e"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "b6370887db10abcd8ea32d9b7e1ebd84"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "b9a6ce86b5cb5dbed3502060cb407ae5"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "1c907ab77ab35ebb9cdfb0bed1116919"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "4c43a1365b1489e0b06b9a81869ff018"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "72d8f04245eabc96212192f1d7bbf1ff"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "4b033a0afcdfef8bd9d8addcd4074deb"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "fcf8b0fb833dd8253a9c699330dd307c"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "218859fad1739556f3920f60143d5d8e"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "660136aa8b9c59d9b216365bb1722db3"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "6ddecdf1d783938bebea56b047f4233b"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "6ba0469e13281bf06e36ffdd45c6f94e"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "c653458f02858022c5a9472fb7c7629c"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "a1495b5912fd17c31769d74aafbd2e3f"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "4b80e0f2adee554b5a27b2406d87ba04"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "75b02b7008c4afe90f43862c4a41889e"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "13761e89b7ce81ef45776000a15ecc62"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "1f7b2786b1a1303c121ed9990393313c"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "fa81786970e393b4b6d22542c90ed167"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "f6524be614b31de8f477e835d81c95aa"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "359bed24807dcf631eb3ad33c49d4a1b"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "e7573d898cb423033e9911ec7fb9187e"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "0759a1b969cc140b3ae9feea88591f9d"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "947930f0205f85c5c8c61a36aeb35ffc"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "5d18c9d149efc77de5f9421d9b44bb22"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "e2d6380a58222e34ce9c06b7af90280b"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "6a3901c9d7af1a194515284ac2096183"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "f1002643ca9bfc00c74ffdce841087c8"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "3acbcf399e3fe5f42673ed0564f310ce"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "1092f871f710565665c8daf0829acd37"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "1dadea3532207da357aa947d39212d09"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "60e707bcec4083cdd35f328a0f99a02d"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "5e06ed26193bea705ab1e2f4736c4d8f"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "9c5516e5f6aa2385f84f8edabc9b6192"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "6e53263ee29df189f88e6adf33b49bf5"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 30932992,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 23592960
}
],
"md5sum": "071709a06f65081a39cc06cde037ef0a"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "41838616045526af595a4bb3e8dcd204"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "b112393a805a4a4e7f4c307a0e814575"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "837d3ea366d88c7354d834c663fb18e9"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "14b897d9ab89fe2695d9af1e0fbdd707"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "5bac6bbc10e1d8c9486b032c34d1038e"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 3678208
},
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 3686400
}
],
"md5sum": "c22f88b72615118b0d76e9ab0c409eee"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "deeb5be153090be234deedbdea5b6063"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "c8c177a370c48a75613f06ca682fea7f"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "60a7fc380314b5df437c236b2892cc41"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "e2858c254c831747560d288b9ddfa0cf"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "5c7a04eeec3853dc83122b357dfbb4be"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "8ef9a2cd97be52857cf4ebc3c017d163"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "4712984f5dd02bc06a2dbfeb385b6a63"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "2032d57d68a4d45faba8d48fb6afc73c"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "9ba7800938b2b35d294a20aab7ef9b0a"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "95fe1599046894d2b57b6db0a9051541"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "45405f9558801a94d837608f1d5aef56"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "3c57aea41862423bd0b0606a2e8c201a"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "ae7e901a2850eb04172cd97222194bbd"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "9436ca36c4376899432be51254ccd8ab"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "f32424cacc6ec53728eb5c191b65ce5c"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "a4bf6ffed9432d6d4d9f6677021a8272"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "f92ac9627f149008e2d5eb8a11ada3dd"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "e38c087a12cef0dc708b13a21f27b0e3"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "5691c56eabec2ca8cdad3adfa76e0a96"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "983fa71d2a9f875cd06c46b5a602e5f2"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "ebb3814cedabd03b98bc24d9af03d553"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "d810fcbd7d0e0a68f27d6254c3ad444f"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "8d17bf825a2941b0bc83dc94b2a24622"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "4069ca2f745d79f4f30923e28b1ed1ed"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "baf47dd069e7563fac316d204e0223e2"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "28cc63489674421b07c5e3199c5af1f2"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "bdab9166b426f3ad986e0bf424840423"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "f7407f85121a36b228ca76630ed58ea9"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "6faa00f5fd21ed2ff8732b5233b83cc8"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "b4a9125428319d11273d72b51735ab89"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "a70ddf1853a0aa3d6499f4607f1ae308"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "46bbd55393846d8485bbee6eaf0b4579"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 32505856,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 7340032,
"byteOffset": 11010048
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 18350080
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 30932992
}
],
"md5sum": "b522d2fdbfb9f16d26b7e480ac87892f"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 9437184,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1048576,
"byteOffset": 8388608
}
],
"md5sum": "4583be86fafd2aeeb6cb1d3acddeef02"
}
]
}