Qwen2.5-7B-Instruct-q4f16_ft-MLC / ndarray-cache.json
dusty-nv's picture
Upload folder using huggingface_hub
e6e6c18 verified
{
"metadata": {
"ParamSize": 313,
"ParamBytes": 3879214080.0,
"BitsPerParam": 4.07500989461587
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 272498688,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
152064,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 272498688,
"byteOffset": 0
}
],
"md5sum": "e67c832b0e8b5c4ae277c0c4752a654b"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 34062336,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
152064,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 34062336,
"byteOffset": 0
}
],
"md5sum": "8aaffd7cef0304a9b56e761087309751"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "b114cdb812dc650ac33d978059f845c4"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "971659de687778fd2d064d1d2b6bdc75"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "738091766ce04bb71782e3e9f0ffffca"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "78c6b522c9ac96c582fc37ddf65dae4d"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "d3a482128d490d5fc280de6faabed558"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "42c60ac1ab1044da94ab5bde37342a85"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "9bc16ed6bf32fc0d31b02841d6968ff4"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 29719552,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 0
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 7168
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14336
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 21504
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 97280
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 104448
},
{
"name": "model.layers.23.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 113664
},
{
"name": "model.layers.23.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8371200
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8380416
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14802944
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14810112
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14817280
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 14824448
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14900224
},
{
"name": "model.layers.24.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14907392
},
{
"name": "model.layers.24.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 14916608
},
{
"name": "model.layers.24.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23174144
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 23183360
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29605888
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29613056
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29620224
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 29627392
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29703168
},
{
"name": "model.layers.25.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29710336
}
],
"md5sum": "36013e8acffd8602acab9e5bdceb11b0"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "1f4129a3f079b5c2b70f4ab95d7595c7"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "602eeb671e2203a1c94a70073396807c"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "5fd55a60e06ead1d4890c5af2288ffa7"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "069bdee321a12786c3721cfe2d2d60aa"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 29605888,
"records": [
{
"name": "model.layers.25.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8257536
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8266752
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14689280
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14696448
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14703616
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 14710784
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14786560
},
{
"name": "model.layers.26.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14793728
},
{
"name": "model.layers.26.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 14802944
},
{
"name": "model.layers.26.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23060480
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 23069696
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29492224
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29499392
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29506560
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 29513728
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29589504
},
{
"name": "model.layers.27.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29596672
}
],
"md5sum": "fc90e93d8d1d0293da441810cf9eccfb"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 272498688,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
152064,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 272498688,
"byteOffset": 0
}
],
"md5sum": "1d340c9328e65098be1f7a6a9e21ed9e"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 34062336,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
152064,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 34062336,
"byteOffset": 0
}
],
"md5sum": "8009ea47b18c21b2da5f5f0ef4891cae"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "bc151229a8af7fcb426758ef86d1fd35"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "e7a96e8f5cb1aeab2b61aa96dfc698a1"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "98f31d5624cb1115a7296a985da53a66"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "35f9010f951841a9cdb5c682e8fa8643"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 29613056,
"records": [
{
"name": "model.layers.27.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8257536
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8266752
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14689280
},
{
"name": "model.norm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14696448
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14703616
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14710784
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 14717952
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14793728
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14800896
},
{
"name": "model.layers.0.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 14810112
},
{
"name": "model.layers.0.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23067648
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 23076864
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29499392
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29506560
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29513728
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 29520896
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29596672
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29603840
}
],
"md5sum": "65b57647aa1eb48370b9ff9710cafea5"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "41ae03bd5014b840c52c204ac9ff6ce4"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "2d99bd13fb36002d031ca05e8f01fad1"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "801c74407aa1286cd07c5b62e4e8b2f1"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "09b6aa5a514c70cc40887e744e1afdb9"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 29605888,
"records": [
{
"name": "model.layers.1.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8257536
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8266752
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14689280
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14696448
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14703616
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 14710784
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14786560
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14793728
},
{
"name": "model.layers.2.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 14802944
},
{
"name": "model.layers.2.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23060480
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 23069696
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29492224
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29499392
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29506560
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 29513728
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29589504
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29596672
}
],
"md5sum": "55bdd7eef38416482cbf5fe1d1af94d6"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "d5a3c8a5859e65a1978daec2f15df7d9"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "7b7891af9f77dbcd5bb33805145104a6"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "8172e6221ead03cd0c1be64ca5ce389f"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "8bd5c6f650dfa6db9df6912c0737754a"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 29605888,
"records": [
{
"name": "model.layers.3.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8257536
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8266752
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14689280
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14696448
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14703616
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 14710784
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14786560
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14793728
},
{
"name": "model.layers.4.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 14802944
},
{
"name": "model.layers.4.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23060480
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 23069696
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29492224
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29499392
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29506560
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 29513728
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29589504
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29596672
}
],
"md5sum": "eea925bcd381e5f820162fdf94e7c846"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "a37f92a3d738e06114948e402f22f921"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "2b2854419f911014dfaac60dd4e57991"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 29522944,
"records": [
{
"name": "model.layers.5.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8257536
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8266752
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14689280
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14696448
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14703616
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14710784
},
{
"name": "model.layers.6.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 14720000
},
{
"name": "model.layers.6.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 22977536
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 22986752
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29409280
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29416448
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29423616
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 29430784
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29506560
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29513728
}
],
"md5sum": "79c4e8f61024a4506e40732603b021be"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "d8c2cac00be25768ec50fccf229d71ca"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "1829fdb4763feae50a2877c863918aad"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "272f8486daf51567d160cd192ed1624c"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "c2ce108ccf36391a7f51d1393f2f6551"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 29605888,
"records": [
{
"name": "model.layers.10.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8257536
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8266752
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14689280
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14696448
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14703616
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 14710784
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14786560
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14793728
},
{
"name": "model.layers.11.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 14802944
},
{
"name": "model.layers.11.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23060480
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 23069696
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29492224
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29499392
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29506560
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 29513728
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29589504
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29596672
}
],
"md5sum": "b8e28d62962ec7e1bc9c0234663ea089"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "d409c9ea05d4219a31f7f3a9fdf6ab89"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "7c4fa4ad19488c873315be3eb96c646d"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "3428efc520365f08c1cb7157d949813c"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 29598720,
"records": [
{
"name": "model.layers.12.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8257536
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8266752
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14689280
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14696448
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14703616
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 14710784
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14786560
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14793728
},
{
"name": "model.layers.13.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 14802944
},
{
"name": "model.layers.13.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23060480
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 23069696
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29492224
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29499392
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 29506560
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29582336
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29589504
}
],
"md5sum": "aeefc44a38ffe316572b5f16c8fecc6e"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "0960a1f95c3601bf252920f47ee0962b"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "9b78ee9b01319380bc9551fa95849d63"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "45e932f5fbc07de42552c58eece95dcb"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "d638935143ce8e9dd4cc57ad05ec96e9"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "eb8d5a02313990a9ec7ad23953fd1cbb"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "d23632a3a1089a16b9ec0a13a22a1ce9"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 29688832,
"records": [
{
"name": "model.layers.14.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8257536
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8266752
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14689280
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14696448
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 14703616
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14779392
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14786560
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 14793728
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14869504
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14876672
},
{
"name": "model.layers.7.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 14885888
},
{
"name": "model.layers.7.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23143424
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 23152640
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29575168
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29582336
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29589504
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 29596672
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29672448
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29679616
}
],
"md5sum": "8b99428d3d61592c0c2a635e98205d05"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "e027ce95a18e2974e4cefe0f5df835f1"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "a7f6d9f007907882cd25d52b2348f312"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "3b4bf8a0756727e2f5e853364e03c4fb"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "89342d044dc9af890b4208f574e1883b"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "f7f836517c543ff6dd9c1f4f6eb8659e"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 29613056,
"records": [
{
"name": "model.layers.8.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8257536
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8266752
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14689280
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14696448
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14703616
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 14710784
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14786560
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14793728
},
{
"name": "model.layers.9.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 14802944
},
{
"name": "model.layers.9.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23060480
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 23069696
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29492224
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29499392
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29506560
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29513728
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 29520896
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29596672
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29603840
}
],
"md5sum": "a85a98866b340235e069025aa08458a9"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "2efc25421b1ac333e97254da97a8344b"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "e032974147334a7a0b98656a24cb662c"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "0d33b6d10b82e4e9d71dece9f13e1cd3"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "4a1519942a45a3e2c2deddc29c2cdf28"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 29605888,
"records": [
{
"name": "model.layers.15.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8257536
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8266752
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14689280
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14696448
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14703616
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 14710784
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14786560
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14793728
},
{
"name": "model.layers.16.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 14802944
},
{
"name": "model.layers.16.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23060480
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 23069696
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29492224
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29499392
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29506560
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 29513728
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29589504
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29596672
}
],
"md5sum": "ab7331ff34701ba080cc57c66378792b"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "6fd87d6696b54b0b5fd0dcf8fcb508c3"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "e2e1214ca04671489d17073c439de25e"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "ee28b730987b7fde90c7e90a7df845df"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "f02b99d8df443964b36a0d0c0211cc06"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 29605888,
"records": [
{
"name": "model.layers.17.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8257536
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8266752
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14689280
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14696448
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14703616
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 14710784
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14786560
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14793728
},
{
"name": "model.layers.18.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 14802944
},
{
"name": "model.layers.18.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23060480
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 23069696
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29492224
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29499392
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29506560
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 29513728
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29589504
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29596672
}
],
"md5sum": "7e3b211c2877245e5733fbfe64581ccb"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "adae781a50202954d476d2dafb807113"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "fbd7b62262b0819c9bcf87d14080fe4f"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "fc3e88d1a371c9f7896a7669e2e922b0"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "ca913b756d51aa984983cdfc2781bfa2"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 29605888,
"records": [
{
"name": "model.layers.19.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8257536
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8266752
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14689280
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14696448
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14703616
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 14710784
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14786560
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14793728
},
{
"name": "model.layers.20.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 14802944
},
{
"name": "model.layers.20.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23060480
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 23069696
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29492224
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29499392
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29506560
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 29513728
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29589504
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29596672
}
],
"md5sum": "bb7c72f0895d6c864423d9b09e4e0d8d"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "de0ae8f1303ae31861139ab685a369ef"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 29492224,
"records": [
{
"name": "model.layers.21.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8257536
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8266752
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14689280
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14696448
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 14703616
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14779392
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14786560
},
{
"name": "model.layers.22.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 14795776
},
{
"name": "model.layers.22.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23053312
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 23062528
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29485056
}
],
"md5sum": "51d36f80c7fe6386aaae33b50702aa9d"
}
]
}