Qwen1.5-0.5B-Chat-q3f16_1-MLC / ndarray-cache.json
Tlopex's picture
Initial commit
ce9efa6 verified
raw
history blame
103 kB
{
"metadata": {
"ParamSize": 269,
"ParamBytes": 283132928.0,
"BitsPerParam": 3.655862583030465
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 63205376,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
151936,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 63205376,
"byteOffset": 0
}
],
"md5sum": "e7b49b4c6ba0344356fbdcdec46233be"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 63205376,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
151936,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 63205376,
"byteOffset": 0
}
],
"md5sum": "e7b49b4c6ba0344356fbdcdec46233be"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 33418240,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
151936,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7900672,
"byteOffset": 0
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
151936,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7900672,
"byteOffset": 7900672
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 15801344
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
1024,
284
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1163264,
"byteOffset": 15803392
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
1024,
71
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 145408,
"byteOffset": 16966656
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
5632,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2342912,
"byteOffset": 17112064
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
5632,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 292864,
"byteOffset": 19454976
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 19747840
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19749888
},
{
"name": "model.layers.0.self_attn.c_attn.q_weight",
"shape": [
3072,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1277952,
"byteOffset": 19756032
},
{
"name": "model.layers.0.self_attn.c_attn.q_scale",
"shape": [
3072,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 159744,
"byteOffset": 21033984
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
1024,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 425984,
"byteOffset": 21193728
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
1024,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 53248,
"byteOffset": 21619712
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 21672960
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
1024,
284
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1163264,
"byteOffset": 21675008
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
1024,
71
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 145408,
"byteOffset": 22838272
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
5632,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2342912,
"byteOffset": 22983680
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
5632,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 292864,
"byteOffset": 25326592
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25619456
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25621504
},
{
"name": "model.layers.1.self_attn.c_attn.q_weight",
"shape": [
3072,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1277952,
"byteOffset": 25627648
},
{
"name": "model.layers.1.self_attn.c_attn.q_scale",
"shape": [
3072,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 159744,
"byteOffset": 26905600
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
1024,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 425984,
"byteOffset": 27065344
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
1024,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 53248,
"byteOffset": 27491328
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 27544576
},
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
1024,
284
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1163264,
"byteOffset": 27546624
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
1024,
71
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 145408,
"byteOffset": 28709888
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
5632,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2342912,
"byteOffset": 28855296
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
5632,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 292864,
"byteOffset": 31198208
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 31491072
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31493120
},
{
"name": "model.layers.10.self_attn.c_attn.q_weight",
"shape": [
3072,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1277952,
"byteOffset": 31499264
},
{
"name": "model.layers.10.self_attn.c_attn.q_scale",
"shape": [
3072,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 159744,
"byteOffset": 32777216
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
1024,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 425984,
"byteOffset": 32936960
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
1024,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 53248,
"byteOffset": 33362944
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 33416192
}
],
"md5sum": "e4b6c5f9d9a8eb8813e4d4f88846ba33"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 33310720,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
1024,
284
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1163264,
"byteOffset": 0
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
1024,
71
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 145408,
"byteOffset": 1163264
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
5632,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2342912,
"byteOffset": 1308672
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
5632,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 292864,
"byteOffset": 3651584
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 3944448
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 3946496
},
{
"name": "model.layers.11.self_attn.c_attn.q_weight",
"shape": [
3072,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1277952,
"byteOffset": 3952640
},
{
"name": "model.layers.11.self_attn.c_attn.q_scale",
"shape": [
3072,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 159744,
"byteOffset": 5230592
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
1024,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 425984,
"byteOffset": 5390336
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
1024,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 53248,
"byteOffset": 5816320
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 5869568
},
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
1024,
284
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1163264,
"byteOffset": 5871616
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
1024,
71
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 145408,
"byteOffset": 7034880
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
5632,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2342912,
"byteOffset": 7180288
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
5632,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 292864,
"byteOffset": 9523200
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 9816064
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9818112
},
{
"name": "model.layers.12.self_attn.c_attn.q_weight",
"shape": [
3072,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1277952,
"byteOffset": 9824256
},
{
"name": "model.layers.12.self_attn.c_attn.q_scale",
"shape": [
3072,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 159744,
"byteOffset": 11102208
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
1024,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 425984,
"byteOffset": 11261952
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
1024,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 53248,
"byteOffset": 11687936
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 11741184
},
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
1024,
284
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1163264,
"byteOffset": 11743232
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
1024,
71
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 145408,
"byteOffset": 12906496
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
5632,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2342912,
"byteOffset": 13051904
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
5632,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 292864,
"byteOffset": 15394816
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 15687680
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15689728
},
{
"name": "model.layers.13.self_attn.c_attn.q_weight",
"shape": [
3072,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1277952,
"byteOffset": 15695872
},
{
"name": "model.layers.13.self_attn.c_attn.q_scale",
"shape": [
3072,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 159744,
"byteOffset": 16973824
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
1024,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 425984,
"byteOffset": 17133568
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
1024,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 53248,
"byteOffset": 17559552
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 17612800
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
1024,
284
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1163264,
"byteOffset": 17614848
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
1024,
71
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 145408,
"byteOffset": 18778112
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
5632,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2342912,
"byteOffset": 18923520
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
5632,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 292864,
"byteOffset": 21266432
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 21559296
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21561344
},
{
"name": "model.layers.14.self_attn.c_attn.q_weight",
"shape": [
3072,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1277952,
"byteOffset": 21567488
},
{
"name": "model.layers.14.self_attn.c_attn.q_scale",
"shape": [
3072,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 159744,
"byteOffset": 22845440
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
1024,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 425984,
"byteOffset": 23005184
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
1024,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 53248,
"byteOffset": 23431168
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 23484416
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
1024,
284
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1163264,
"byteOffset": 23486464
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
1024,
71
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 145408,
"byteOffset": 24649728
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
5632,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2342912,
"byteOffset": 24795136
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
5632,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 292864,
"byteOffset": 27138048
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 27430912
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 27432960
},
{
"name": "model.layers.15.self_attn.c_attn.q_weight",
"shape": [
3072,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1277952,
"byteOffset": 27439104
},
{
"name": "model.layers.15.self_attn.c_attn.q_scale",
"shape": [
3072,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 159744,
"byteOffset": 28717056
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
1024,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 425984,
"byteOffset": 28876800
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
1024,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 53248,
"byteOffset": 29302784
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 29356032
},
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
1024,
284
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1163264,
"byteOffset": 29358080
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
1024,
71
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 145408,
"byteOffset": 30521344
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
5632,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2342912,
"byteOffset": 30666752
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
5632,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 292864,
"byteOffset": 33009664
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 33302528
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33304576
}
],
"md5sum": "4fb729f6a7bc624aa0f7422ac38cfcef"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 32585728,
"records": [
{
"name": "model.layers.16.self_attn.c_attn.q_weight",
"shape": [
3072,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1277952,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.c_attn.q_scale",
"shape": [
3072,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 159744,
"byteOffset": 1277952
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
1024,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 425984,
"byteOffset": 1437696
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
1024,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 53248,
"byteOffset": 1863680
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 1916928
},
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
1024,
284
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1163264,
"byteOffset": 1918976
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
1024,
71
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 145408,
"byteOffset": 3082240
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
5632,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2342912,
"byteOffset": 3227648
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
5632,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 292864,
"byteOffset": 5570560
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 5863424
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 5865472
},
{
"name": "model.layers.17.self_attn.c_attn.q_weight",
"shape": [
3072,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1277952,
"byteOffset": 5871616
},
{
"name": "model.layers.17.self_attn.c_attn.q_scale",
"shape": [
3072,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 159744,
"byteOffset": 7149568
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
1024,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 425984,
"byteOffset": 7309312
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
1024,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 53248,
"byteOffset": 7735296
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 7788544
},
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
1024,
284
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1163264,
"byteOffset": 7790592
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
1024,
71
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 145408,
"byteOffset": 8953856
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
5632,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2342912,
"byteOffset": 9099264
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
5632,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 292864,
"byteOffset": 11442176
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 11735040
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 11737088
},
{
"name": "model.layers.18.self_attn.c_attn.q_weight",
"shape": [
3072,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1277952,
"byteOffset": 11743232
},
{
"name": "model.layers.18.self_attn.c_attn.q_scale",
"shape": [
3072,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 159744,
"byteOffset": 13021184
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
1024,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 425984,
"byteOffset": 13180928
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
1024,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 53248,
"byteOffset": 13606912
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 13660160
},
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
1024,
284
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1163264,
"byteOffset": 13662208
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
1024,
71
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 145408,
"byteOffset": 14825472
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
5632,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2342912,
"byteOffset": 14970880
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
5632,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 292864,
"byteOffset": 17313792
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 17606656
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17608704
},
{
"name": "model.layers.19.self_attn.c_attn.q_weight",
"shape": [
3072,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1277952,
"byteOffset": 17614848
},
{
"name": "model.layers.19.self_attn.c_attn.q_scale",
"shape": [
3072,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 159744,
"byteOffset": 18892800
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
1024,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 425984,
"byteOffset": 19052544
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
1024,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 53248,
"byteOffset": 19478528
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 19531776
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
1024,
284
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1163264,
"byteOffset": 19533824
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
1024,
71
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 145408,
"byteOffset": 20697088
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
5632,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2342912,
"byteOffset": 20842496
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
5632,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 292864,
"byteOffset": 23185408
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 23478272
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23480320
},
{
"name": "model.layers.2.self_attn.c_attn.q_weight",
"shape": [
3072,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1277952,
"byteOffset": 23486464
},
{
"name": "model.layers.2.self_attn.c_attn.q_scale",
"shape": [
3072,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 159744,
"byteOffset": 24764416
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
1024,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 425984,
"byteOffset": 24924160
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
1024,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 53248,
"byteOffset": 25350144
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25403392
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
1024,
284
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1163264,
"byteOffset": 25405440
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
1024,
71
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 145408,
"byteOffset": 26568704
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
5632,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2342912,
"byteOffset": 26714112
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
5632,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 292864,
"byteOffset": 29057024
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 29349888
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 29351936
},
{
"name": "model.layers.20.self_attn.c_attn.q_weight",
"shape": [
3072,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1277952,
"byteOffset": 29358080
},
{
"name": "model.layers.20.self_attn.c_attn.q_scale",
"shape": [
3072,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 159744,
"byteOffset": 30636032
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
1024,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 425984,
"byteOffset": 30795776
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
1024,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 53248,
"byteOffset": 31221760
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 31275008
},
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
1024,
284
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1163264,
"byteOffset": 31277056
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
1024,
71
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 145408,
"byteOffset": 32440320
}
],
"md5sum": "8572c24763eba76e31a9e9087aedc3bd"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 33439744,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
5632,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2342912,
"byteOffset": 0
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
5632,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 292864,
"byteOffset": 2342912
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 2635776
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 2637824
},
{
"name": "model.layers.21.self_attn.c_attn.q_weight",
"shape": [
3072,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1277952,
"byteOffset": 2643968
},
{
"name": "model.layers.21.self_attn.c_attn.q_scale",
"shape": [
3072,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 159744,
"byteOffset": 3921920
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
1024,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 425984,
"byteOffset": 4081664
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
1024,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 53248,
"byteOffset": 4507648
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 4560896
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
1024,
284
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1163264,
"byteOffset": 4562944
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
1024,
71
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 145408,
"byteOffset": 5726208
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
5632,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2342912,
"byteOffset": 5871616
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
5632,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 292864,
"byteOffset": 8214528
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 8507392
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 8509440
},
{
"name": "model.layers.22.self_attn.c_attn.q_weight",
"shape": [
3072,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1277952,
"byteOffset": 8515584
},
{
"name": "model.layers.22.self_attn.c_attn.q_scale",
"shape": [
3072,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 159744,
"byteOffset": 9793536
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
1024,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 425984,
"byteOffset": 9953280
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
1024,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 53248,
"byteOffset": 10379264
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 10432512
},
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
1024,
284
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1163264,
"byteOffset": 10434560
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
1024,
71
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 145408,
"byteOffset": 11597824
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
5632,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2342912,
"byteOffset": 11743232
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
5632,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 292864,
"byteOffset": 14086144
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 14379008
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14381056
},
{
"name": "model.layers.23.self_attn.c_attn.q_weight",
"shape": [
3072,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1277952,
"byteOffset": 14387200
},
{
"name": "model.layers.23.self_attn.c_attn.q_scale",
"shape": [
3072,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 159744,
"byteOffset": 15665152
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
1024,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 425984,
"byteOffset": 15824896
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
1024,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 53248,
"byteOffset": 16250880
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16304128
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
1024,
284
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1163264,
"byteOffset": 16306176
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
1024,
71
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 145408,
"byteOffset": 17469440
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
5632,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2342912,
"byteOffset": 17614848
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
5632,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 292864,
"byteOffset": 19957760
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 20250624
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 20252672
},
{
"name": "model.layers.3.self_attn.c_attn.q_weight",
"shape": [
3072,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1277952,
"byteOffset": 20258816
},
{
"name": "model.layers.3.self_attn.c_attn.q_scale",
"shape": [
3072,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 159744,
"byteOffset": 21536768
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
1024,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 425984,
"byteOffset": 21696512
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
1024,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 53248,
"byteOffset": 22122496
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 22175744
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
1024,
284
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1163264,
"byteOffset": 22177792
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
1024,
71
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 145408,
"byteOffset": 23341056
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
5632,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2342912,
"byteOffset": 23486464
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
5632,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 292864,
"byteOffset": 25829376
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 26122240
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 26124288
},
{
"name": "model.layers.4.self_attn.c_attn.q_weight",
"shape": [
3072,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1277952,
"byteOffset": 26130432
},
{
"name": "model.layers.4.self_attn.c_attn.q_scale",
"shape": [
3072,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 159744,
"byteOffset": 27408384
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
1024,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 425984,
"byteOffset": 27568128
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
1024,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 53248,
"byteOffset": 27994112
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 28047360
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
1024,
284
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1163264,
"byteOffset": 28049408
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
1024,
71
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 145408,
"byteOffset": 29212672
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
5632,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2342912,
"byteOffset": 29358080
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
5632,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 292864,
"byteOffset": 31700992
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 31993856
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31995904
},
{
"name": "model.layers.5.self_attn.c_attn.q_weight",
"shape": [
3072,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1277952,
"byteOffset": 32002048
},
{
"name": "model.layers.5.self_attn.c_attn.q_scale",
"shape": [
3072,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 159744,
"byteOffset": 33280000
}
],
"md5sum": "c3e18cf55248dd1d744e5c930cfd7468"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 23967744,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
1024,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 425984,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
1024,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 53248,
"byteOffset": 425984
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 479232
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
1024,
284
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1163264,
"byteOffset": 481280
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
1024,
71
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 145408,
"byteOffset": 1644544
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
5632,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2342912,
"byteOffset": 1789952
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
5632,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 292864,
"byteOffset": 4132864
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 4425728
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 4427776
},
{
"name": "model.layers.6.self_attn.c_attn.q_weight",
"shape": [
3072,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1277952,
"byteOffset": 4433920
},
{
"name": "model.layers.6.self_attn.c_attn.q_scale",
"shape": [
3072,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 159744,
"byteOffset": 5711872
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
1024,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 425984,
"byteOffset": 5871616
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
1024,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 53248,
"byteOffset": 6297600
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 6350848
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
1024,
284
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1163264,
"byteOffset": 6352896
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
1024,
71
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 145408,
"byteOffset": 7516160
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
5632,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2342912,
"byteOffset": 7661568
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
5632,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 292864,
"byteOffset": 10004480
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 10297344
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 10299392
},
{
"name": "model.layers.7.self_attn.c_attn.q_weight",
"shape": [
3072,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1277952,
"byteOffset": 10305536
},
{
"name": "model.layers.7.self_attn.c_attn.q_scale",
"shape": [
3072,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 159744,
"byteOffset": 11583488
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
1024,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 425984,
"byteOffset": 11743232
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
1024,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 53248,
"byteOffset": 12169216
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 12222464
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
1024,
284
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1163264,
"byteOffset": 12224512
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
1024,
71
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 145408,
"byteOffset": 13387776
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
5632,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2342912,
"byteOffset": 13533184
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
5632,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 292864,
"byteOffset": 15876096
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16168960
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 16171008
},
{
"name": "model.layers.8.self_attn.c_attn.q_weight",
"shape": [
3072,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1277952,
"byteOffset": 16177152
},
{
"name": "model.layers.8.self_attn.c_attn.q_scale",
"shape": [
3072,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 159744,
"byteOffset": 17455104
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
1024,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 425984,
"byteOffset": 17614848
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
1024,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 53248,
"byteOffset": 18040832
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 18094080
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
1024,
284
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1163264,
"byteOffset": 18096128
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
1024,
71
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 145408,
"byteOffset": 19259392
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
5632,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2342912,
"byteOffset": 19404800
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
5632,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 292864,
"byteOffset": 21747712
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 22040576
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 22042624
},
{
"name": "model.layers.9.self_attn.c_attn.q_weight",
"shape": [
3072,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1277952,
"byteOffset": 22048768
},
{
"name": "model.layers.9.self_attn.c_attn.q_scale",
"shape": [
3072,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 159744,
"byteOffset": 23326720
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
1024,
104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 425984,
"byteOffset": 23486464
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
1024,
26
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 53248,
"byteOffset": 23912448
},
{
"name": "model.norm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 23965696
}
],
"md5sum": "21d7d4b461aa432cc38af98cc018f736"
}
]
}