Phi-3.5-mini-instruct-q4f32_1-MLC / ndarray-cache-b16.json
mengshyu's picture
Upload folder using huggingface_hub
c9c2756 verified
{
"metadata": {
"ParamSize": 325,
"ParamBytes": 2388848640.0,
"BitsPerParam": 5.001410952042906
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 49250304,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32064,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 49250304,
"byteOffset": 0
}
],
"md5sum": "3aeeb3d7e9d8e033e2294aaa3617c930"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.21.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "4c6eb27388de688ccd2fa3a8ee670b51"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 23470080,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32064,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6156288,
"byteOffset": 0
},
{
"name": "transformer.h.21.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 6156288
},
{
"name": "transformer.h.21.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 6162432
},
{
"name": "transformer.h.21.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 18745344
},
{
"name": "transformer.h.21.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 20318208
},
{
"name": "transformer.h.21.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 23463936
}
],
"md5sum": "b79b4d227556e018376ea5d6fd622968"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.22.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e2b23df08fdde2a47ae85ad21aa558a0"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.21.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.21.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.22.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.22.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.22.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.22.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.22.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "725020e4e78739a8f6343cb415878cd0"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.22.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.22.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.22.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.22.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.23.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "e4ae28ecd247c29bbc4928f992d1bced"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.23.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "275662165301882e56bced0cfecdbab7"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.23.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.23.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.23.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.23.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.23.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.23.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "f45f08bcf9df989fddf13aa89358f016"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.24.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "ce8fcd47094a2b30452c24ce679cbe7c"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.23.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.23.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.24.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.24.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.24.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.24.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.24.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "d09017dad54cf3f00e5e172b62477a0a"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.24.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.24.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.24.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.24.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.25.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "4c9676bece9b5e7b221a69336026a268"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.25.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "0e72d98ffd8355dd7be0b1a4fcffa048"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.25.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.25.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.25.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.25.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.25.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.25.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "d09eeff14583901b180dc6eccb4ace12"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.26.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "99b504cdeea00e6028b34a65c94a9535"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.25.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.25.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.26.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.26.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.26.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.26.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.26.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "c67c1651ca7d9ca3cdcff92dbf9d50d5"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.26.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.26.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.26.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.26.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.27.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "894a738901cf878c2c8494742c86099d"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.27.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b81cca171ee50bf0c5765e60cd249633"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.27.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.27.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.27.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.27.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.27.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.27.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "cfa61f772775738e0a0353ec164421c6"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.28.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "cc4e16d406507daff55b0fcf11c2be6d"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.27.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.27.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.28.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.28.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.28.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.28.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.28.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "99e55559e6cf21d7141bf69d453a6298"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.28.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.28.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.28.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.28.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.29.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "cf72032d2f29bab3fe90b58a0c58cb36"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.29.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "057b59306b4ad62ffd608fa7431df6c4"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.29.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.29.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.29.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.29.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.29.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.29.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "be5495faf43ff8ef7b41e2e1591b2f63"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.30.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "90dc59780c48c89ca310944617918b30"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.29.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.29.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.30.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.30.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.30.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.30.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.30.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "357d7ff9e326fb7afb1c19ebabb9437e"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.30.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.30.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.30.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.30.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.31.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "d650c22c656bd85695c90079e696df7d"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.31.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "20370d2b717a8e485de07646ba0e31d7"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.31.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.31.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.31.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.31.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.31.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.31.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "ca3d3d45623ec5e503780e7e08b06241"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 49250304,
"records": [
{
"name": "transformer.embd.q_weight",
"shape": [
32064,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 49250304,
"byteOffset": 0
}
],
"md5sum": "6151d64f2428a5c1d62203744ae7522f"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 22093824,
"records": [
{
"name": "transformer.h.31.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.31.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.norm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.embd.q_scale",
"shape": [
32064,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6156288,
"byteOffset": 15931392
},
{
"name": "transformer.h.0.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 22087680
}
],
"md5sum": "3f943d23a92f58ba96157b96389476b5"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.0.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "4343c908c761bf45cd71ea2342de2e9d"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.0.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.0.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.0.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.0.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.0.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.0.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "c2bd6ef56d875c4f0adf154cfe47c933"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.1.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "cd7f902c8eeaa61096ce9f4b28851caf"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.0.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.0.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.1.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.1.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.1.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.1.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.1.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "281620d48c1b4b25171eac94add6e6a0"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.1.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.1.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.1.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.1.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.10.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "71c800a9c8f2688b5a09fab340b952ab"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.10.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "eb56232c87f198c03c89d18e5ef36b67"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.10.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.10.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.10.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.10.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.10.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.10.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "f1697a0679ed6fa119d0f06c2d6afd9a"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.11.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "1c21ba64c0480980df4feb90e466ba7c"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.10.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.10.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.11.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.11.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.11.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.11.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.11.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "357ddc41322e4e58c1500316ba5eff87"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.11.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.11.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.11.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.11.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.12.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "ca57db2f19189c4ee50f1b259502345c"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.12.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a0f302db7e317d5affa27c8f6c1ba903"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.12.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.12.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.12.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.12.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.12.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.12.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "7ccbda76aa15483e6a3f15eba7fc6a70"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.13.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "efa96e2bc14423470f06edd30dd8a1ee"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.12.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.12.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.13.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.13.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.13.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.13.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.13.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "4a5dc9c0b947885e1cf127639fbfe632"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.13.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.13.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.13.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.13.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.14.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "67903c92547a920e3a8160a591c51044"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.14.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "482cbcfc8565a260833ac2ef6f854dcf"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.14.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.14.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.14.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.14.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.14.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.14.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "6e5cdaaa46681ebee024b02fcc59d597"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.15.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "20f1425c7d6d4ebe6d38086e8014f8b5"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.14.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.14.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.15.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.15.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.15.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.15.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.15.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "ad9cff41d1e77691fe35d1e2d1cc288f"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.15.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.15.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.15.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.15.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.16.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "53c4c98a271b4528f433486363134787"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.16.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "cb442181137be51100cc66942af5d750"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.16.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.16.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.16.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.16.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.16.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.16.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "f1f65ff2578babdee1f79b9746f53777"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.17.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "43deb776cdcf11197e52daa5eaf65bb4"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.16.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.16.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.17.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.17.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.17.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.17.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.17.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "30e4627221d5cd27e21f27c419916cdc"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.17.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.17.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.17.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.17.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.18.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "958f964c7c041d88ee24eaea103f9f5b"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.18.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "0916edaf606a71efca575bdab3eb7fed"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.18.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.18.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.18.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.18.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.18.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.18.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "f384838842289a74a8033bbb4b0c6752"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.19.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "87402cfadbf5f167aff039e25e5bd8df"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.18.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.18.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.19.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.19.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.19.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.19.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.19.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "26e8a37ec5bffe7133a9bc27c1a78cb3"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.19.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.19.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.19.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.19.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.2.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "9298980b2969ff9185155c63f27c4833"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.2.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "fe08c7935fa15b57efc3c4091e779770"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.2.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.2.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.2.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.2.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.2.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.2.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "b5de4b357981555a6ffb6b1508b80c3c"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.20.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "884ecdd4f22892d8bdcc791aa3cd6452"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.2.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.2.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.20.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.20.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.20.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.20.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.20.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "8b478873b579011c9503b8e1c18da89b"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 26548224,
"records": [
{
"name": "transformer.h.20.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.20.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.20.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.20.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.21.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 21233664
},
{
"name": "transformer.h.21.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 25952256
},
{
"name": "transformer.h.3.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 26542080
}
],
"md5sum": "d868060e32452d6ad8de1939afa175a0"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.3.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c863795534140ccc964b6a6bb5477722"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.3.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.3.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.3.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.3.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.3.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.3.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "5ee6f1c5c478e7defad5adc986d203bd"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.4.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "fd4abc21b279b19fa3e7021fb1bdd6d8"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.3.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.3.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.4.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.4.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.4.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.4.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.4.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "92cf09176b4af272ff88b4d770bebb49"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.4.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.4.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.4.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.4.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.5.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "f2576337c4f7e49b7f289cb52ca9691c"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.5.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "4aad834b4bc23460843517ff71d6a03c"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.5.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.5.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.5.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.5.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.5.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.5.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "76b77a44a3819f7b935f5503820080c9"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.6.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "28d4502b269e7fdaef6d85e023fb8181"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.5.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.5.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.6.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.6.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.6.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.6.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.6.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "122540bf729688fc534acaaf84d3a3f8"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.6.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.6.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.6.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.6.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.7.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "0a15e33fd4087a189cc2284bfdfe05b2"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.7.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "552a946908bb2a8e68df3ad4124df411"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.7.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.7.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.7.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.7.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.7.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.7.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "39c5ce0bbd57ffc7293c1f21a4340678"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.8.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d394cec2b8266f047c085ca6b27a23de"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.7.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.7.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.8.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.8.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.8.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.8.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.8.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "c702901239fe22e5e139aa2b315eb4ab"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.8.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.8.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.8.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.8.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.9.ln.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "cea066ba9a628075f151bf691f5c0792"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.9.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "548f267eb43d829b7094b0544bc8e46e"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.9.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.9.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.9.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.9.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.9.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.9.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "91104658482ada6ec8851f5670890038"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 15925248,
"records": [
{
"name": "transformer.h.9.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.9.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1769472,
"byteOffset": 14155776
}
],
"md5sum": "acfd1a32ed7e0091ec650cbfc933b085"
}
]
}