dusty-nv's picture
Upload folder using huggingface_hub
66a12a2 verified
{
"metadata": {
"ParamSize": 325,
"ParamBytes": 4083949568.0,
"BitsPerParam": 4.068559606592764
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 262668288,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
128256,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 262668288,
"byteOffset": 0
}
],
"md5sum": "a035ab83efd6411bc34980ea5f43c41a"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "2d2557ddb0f2f1f8946aadef17349873"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 262668288,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
128256,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 262668288,
"byteOffset": 0
}
],
"md5sum": "f11702c20f052d8c5486bb8fe8390333"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 32833536,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
128256,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32833536,
"byteOffset": 0
}
],
"md5sum": "6b34f84f2c250bfef16ebcebed38f243"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "7cf2c33ea9d38e90b453280acaa755f7"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "b498e9473b8ba30abd79e69cfd2b5f09"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 32948224,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
128256,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32833536,
"byteOffset": 0
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32833536
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32841728
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32849920
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32858112
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32866304
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32874496
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 32882688
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32940032
}
],
"md5sum": "c562880447756ddd3c2dedfb27f02dc6"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "33f62039960ba8d4cc13fbbdb7d510a0"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "0cca5c027dfdcd137d6f661e64d92316"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21008384
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "84e3af93937965b4ca80b24ea487a06a"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "42f71545f3d4b04a21ba4192d7945c59"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "f239440619320035aaf8179bb7b04da6"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21008384
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "a279f14ed08ade715ee375e4816636f1"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "f24bc0dafc1eb278c73b74783421f948"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "a43e86165e9ec294022d08fd17eedde9"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21008384
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "7ef823dd057d56b756c35e575cad0835"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "07c93cc6054783e4f7215506a68513a6"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "3714232feca5ee84b733a3b69b1f0bd8"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21008384
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "d2e0c2f186354a7e2adce5a0ec9fa527"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "88c9f8bf631d75506472bcd6b61ffe28"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "a94acece40b08935045bc9ade474f9e6"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21008384
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "5bbade8666b37be0f39c5673a5d2267d"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "9837d0052962c610b9d1d8fce5df772e"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "5c1627554bfef5348ba81d1356706cdb"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21008384
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "78b5b7082d3f5ab3892660e9b329eb7d"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "d8cff624fb18d5db4de2b1ca252fa0c8"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "933f42ae0fcfd2c96923f3d6765b7b11"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21008384
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "03a35fda62120d81f918a56eee00e9ef"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "f9d29a67f70c795cdbb45233f5ab394e"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "910fe5aa08d88ff774e165d1dea82555"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21008384
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "69accf34bba156bc5d3d34df105f3af9"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "43db511d390874f336750e1962f829b6"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "77d3b7e717c486e5efc8ab654cd1c445"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21008384
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "578406bf82b230692d87069da300f86f"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "1ca78d7dfe70602c898eb6abe7807391"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "e3fcbbfcce7a62d95371b9f2aa66441f"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21008384
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "e8b2c549fbfc49e0763e0f25336184cc"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "eeb26e8425acd3eb80125c215a0867ba"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "71d3c88ea5b7cdaadc55fcdf91acfcf8"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21008384
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "b6859d04e254206e4646b64f723e77a8"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "5a9e31c61d1caf34aa3e89b8f7d52552"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "c05974c01d17678e7c87f9806113cd89"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21008384
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "87f570fe000b2fac87c5601a16d94e58"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "a439be50757c156067f2245791f80fe0"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "0ddc9655280a182d2046c367e3d1b247"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21008384
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "594251b4b8ebc3f73281b115d9b1233b"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "a8d9ff72cceda1d48adca17f3eba8ab6"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "aed22598c8dcd4bb81a77be2f67b9fa3"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21008384
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "6563a769655ac5afb503fa41f1fe9e2b"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "6e08783ff54c41009ed303a016f64c5e"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "df12dd3b97570110de6756e68bb7a906"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21008384
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "2ad00a735c8412751b439030366ab068"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "ff6f2f60a3a3abce91feffb236b82897"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "cf676ad26b015cd0f83a2c07be5c98b4"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21008384
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "2254c125d4019fb555641e5e53bad604"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "7a9665ae23fe81d11039e86d02e88fed"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "e5e762e7a161f7e08728eefe78477f46"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21008384
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "96e38a773df75b57fa1d82282054d0c3"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "e48b416d7b57ef5352e3f69c7841bf0c"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "a7c686bdb500f448c51976de816c8d5a"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21008384
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "2bc28d3d225a6b81f24f1db1c42a7d96"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "0c949cdabb82f0ec6fb0aea9531ab16d"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 21049344,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
}
],
"md5sum": "63694461e15a1456a2850ff567bd66d2"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "aea29da58f280f022ac3285c57244e35"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "0ea4e42f71b515fae6f7434faadfb4c7"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21008384
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "04c64bbb8c39f8e4c6dac8d8ce4b999e"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "76f1f76d14d5469308a6cedca7ab6c7a"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "f5675dde22ba1ab97a897dbbe1d120d0"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "3e5ede8bca49b0f17cdbb8a055f0f659"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 21098496,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21008384
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21016576
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21024768
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21032960
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21090304
}
],
"md5sum": "cc04b13d85808506fab718f9f1c2dd5f"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "2fb832003b9a538fb4901d7fb9fa4579"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "1a5ea836991025e0684d700efaa6385e"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21008384
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "aa6b89174bacf683909943e8c945ddda"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "205962a0a997bec08499f27e02626cb2"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "c6151b94e20eabd40df6fe866b38559e"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21008384
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "4ffa93f7058e7a2e49c73b3894b75d85"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "99cc3fe8ed729fc4b201db27edf565f8"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "3777b7b133c3394f3018bdcbe7f3c364"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21008384
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "92f9ba489c38838b7da44dc667f82420"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "a03c8b19efaaf91ef80a9b0ea64472ab"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "55f3b03e12ee844a65c66a29f8af4e61"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21008384
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "75f4ba944388a9963e51bf4d695f63f0"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "0ac001877b8c3642776aa34341ed4ad3"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "8029c5abb5f48de55a688931809cff82"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21008384
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "5e8678a58df90e13a36e47e8eb28ca92"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "463e664ea123ba2414f716b179c03cc5"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "451e358fc9aca04c0bebb88384404dcc"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21008384
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "84e793c8870dab6411e930f540c859c0"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "46fe746fe73ac7691b5cad13d7c20628"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "b2af987605c469f444fd6f8838a11683"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21008384
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "706c2de9aab3850c3785fc0524a18a75"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "a61ae64e5afcad353e4a2d40f81c61df"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "be009fbf46b8a28b8bca1986b70df1d9"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21008384
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "9614fad182558e1221435480dd3128f6"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "80cd5cec42864b0aaca7448dc9a21148"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "883477d14b2fb61820077b917b55e289"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20992000
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21000192
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 21008384
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "245fc98749303473f0c85f7f4ce6f274"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "424bc4c52a7a8d30b0eab8de2c5920ed"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 21049344,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
}
],
"md5sum": "4ac64a764627c276906e1e0d69fa617d"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 20992000,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
}
],
"md5sum": "cdbc0c3f5c671a05abe21ac23b71ebe2"
}
]
}