|
{ |
|
"metadata": { |
|
"ParamSize": 269, |
|
"ParamBytes": 1033572352.0, |
|
"BitsPerParam": 4.501551474039708 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 155582464, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_weight", |
|
"shape": [ |
|
256, |
|
151936 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 155582464, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "55d817231a8cfb46e80892f42f7486ff" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28901376, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_scale", |
|
"shape": [ |
|
64, |
|
151936 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 19447808, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.14.attn.c_attn.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 19447808 |
|
}, |
|
{ |
|
"name": "transformer.h.14.attn.c_attn.q_weight", |
|
"shape": [ |
|
256, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 19460096 |
|
}, |
|
{ |
|
"name": "transformer.h.14.attn.c_attn.q_scale", |
|
"shape": [ |
|
64, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 786432, |
|
"byteOffset": 25751552 |
|
}, |
|
{ |
|
"name": "transformer.h.14.attn.c_proj.q_weight", |
|
"shape": [ |
|
256, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26537984 |
|
}, |
|
{ |
|
"name": "transformer.h.14.attn.c_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28635136 |
|
}, |
|
{ |
|
"name": "transformer.h.14.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28897280 |
|
} |
|
], |
|
"md5sum": "544f6005e17ec487fd384be0115ea74c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28479488, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.14.mlp.c_proj.q_weight", |
|
"shape": [ |
|
688, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5636096, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mlp.c_proj.q_scale", |
|
"shape": [ |
|
172, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 704512, |
|
"byteOffset": 5636096 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
256, |
|
11008 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 6340608 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
11008 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1409024, |
|
"byteOffset": 17612800 |
|
}, |
|
{ |
|
"name": "transformer.h.15.attn.c_attn.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 19021824 |
|
}, |
|
{ |
|
"name": "transformer.h.15.attn.c_attn.q_weight", |
|
"shape": [ |
|
256, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 19034112 |
|
}, |
|
{ |
|
"name": "transformer.h.15.attn.c_attn.q_scale", |
|
"shape": [ |
|
64, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 786432, |
|
"byteOffset": 25325568 |
|
}, |
|
{ |
|
"name": "transformer.h.15.attn.c_proj.q_weight", |
|
"shape": [ |
|
256, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26112000 |
|
}, |
|
{ |
|
"name": "transformer.h.15.attn.c_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28209152 |
|
}, |
|
{ |
|
"name": "transformer.h.15.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28471296 |
|
}, |
|
{ |
|
"name": "transformer.h.15.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28475392 |
|
} |
|
], |
|
"md5sum": "003faeb5d32289296b8fb6f1c0a360ad" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28479488, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.15.mlp.c_proj.q_weight", |
|
"shape": [ |
|
688, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5636096, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mlp.c_proj.q_scale", |
|
"shape": [ |
|
172, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 704512, |
|
"byteOffset": 5636096 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
256, |
|
11008 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 6340608 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
11008 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1409024, |
|
"byteOffset": 17612800 |
|
}, |
|
{ |
|
"name": "transformer.h.16.attn.c_attn.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 19021824 |
|
}, |
|
{ |
|
"name": "transformer.h.16.attn.c_attn.q_weight", |
|
"shape": [ |
|
256, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 19034112 |
|
}, |
|
{ |
|
"name": "transformer.h.16.attn.c_attn.q_scale", |
|
"shape": [ |
|
64, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 786432, |
|
"byteOffset": 25325568 |
|
}, |
|
{ |
|
"name": "transformer.h.16.attn.c_proj.q_weight", |
|
"shape": [ |
|
256, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26112000 |
|
}, |
|
{ |
|
"name": "transformer.h.16.attn.c_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28209152 |
|
}, |
|
{ |
|
"name": "transformer.h.16.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28471296 |
|
}, |
|
{ |
|
"name": "transformer.h.16.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28475392 |
|
} |
|
], |
|
"md5sum": "6389d3daaab1740671c0dcc0ae14e5af" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28479488, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.16.mlp.c_proj.q_weight", |
|
"shape": [ |
|
688, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5636096, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mlp.c_proj.q_scale", |
|
"shape": [ |
|
172, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 704512, |
|
"byteOffset": 5636096 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
256, |
|
11008 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 6340608 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
11008 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1409024, |
|
"byteOffset": 17612800 |
|
}, |
|
{ |
|
"name": "transformer.h.17.attn.c_attn.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 19021824 |
|
}, |
|
{ |
|
"name": "transformer.h.17.attn.c_attn.q_weight", |
|
"shape": [ |
|
256, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 19034112 |
|
}, |
|
{ |
|
"name": "transformer.h.17.attn.c_attn.q_scale", |
|
"shape": [ |
|
64, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 786432, |
|
"byteOffset": 25325568 |
|
}, |
|
{ |
|
"name": "transformer.h.17.attn.c_proj.q_weight", |
|
"shape": [ |
|
256, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26112000 |
|
}, |
|
{ |
|
"name": "transformer.h.17.attn.c_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28209152 |
|
}, |
|
{ |
|
"name": "transformer.h.17.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28471296 |
|
}, |
|
{ |
|
"name": "transformer.h.17.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28475392 |
|
} |
|
], |
|
"md5sum": "8ce342cbb7717f5749828e5850d6dfc1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28479488, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.17.mlp.c_proj.q_weight", |
|
"shape": [ |
|
688, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5636096, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mlp.c_proj.q_scale", |
|
"shape": [ |
|
172, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 704512, |
|
"byteOffset": 5636096 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
256, |
|
11008 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 6340608 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
11008 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1409024, |
|
"byteOffset": 17612800 |
|
}, |
|
{ |
|
"name": "transformer.h.18.attn.c_attn.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 19021824 |
|
}, |
|
{ |
|
"name": "transformer.h.18.attn.c_attn.q_weight", |
|
"shape": [ |
|
256, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 19034112 |
|
}, |
|
{ |
|
"name": "transformer.h.18.attn.c_attn.q_scale", |
|
"shape": [ |
|
64, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 786432, |
|
"byteOffset": 25325568 |
|
}, |
|
{ |
|
"name": "transformer.h.18.attn.c_proj.q_weight", |
|
"shape": [ |
|
256, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26112000 |
|
}, |
|
{ |
|
"name": "transformer.h.18.attn.c_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28209152 |
|
}, |
|
{ |
|
"name": "transformer.h.18.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28471296 |
|
}, |
|
{ |
|
"name": "transformer.h.18.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28475392 |
|
} |
|
], |
|
"md5sum": "5e2a13838d1ec9b41a248975eff518cd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28479488, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.18.mlp.c_proj.q_weight", |
|
"shape": [ |
|
688, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5636096, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mlp.c_proj.q_scale", |
|
"shape": [ |
|
172, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 704512, |
|
"byteOffset": 5636096 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
256, |
|
11008 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 6340608 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
11008 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1409024, |
|
"byteOffset": 17612800 |
|
}, |
|
{ |
|
"name": "transformer.h.19.attn.c_attn.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 19021824 |
|
}, |
|
{ |
|
"name": "transformer.h.19.attn.c_attn.q_weight", |
|
"shape": [ |
|
256, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 19034112 |
|
}, |
|
{ |
|
"name": "transformer.h.19.attn.c_attn.q_scale", |
|
"shape": [ |
|
64, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 786432, |
|
"byteOffset": 25325568 |
|
}, |
|
{ |
|
"name": "transformer.h.19.attn.c_proj.q_weight", |
|
"shape": [ |
|
256, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26112000 |
|
}, |
|
{ |
|
"name": "transformer.h.19.attn.c_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28209152 |
|
}, |
|
{ |
|
"name": "transformer.h.19.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28471296 |
|
}, |
|
{ |
|
"name": "transformer.h.19.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28475392 |
|
} |
|
], |
|
"md5sum": "819864e69047f88ccf40b00d1110d54e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28479488, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.19.mlp.c_proj.q_weight", |
|
"shape": [ |
|
688, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5636096, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mlp.c_proj.q_scale", |
|
"shape": [ |
|
172, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 704512, |
|
"byteOffset": 5636096 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
256, |
|
11008 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 6340608 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
11008 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1409024, |
|
"byteOffset": 17612800 |
|
}, |
|
{ |
|
"name": "transformer.h.20.attn.c_attn.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 19021824 |
|
}, |
|
{ |
|
"name": "transformer.h.20.attn.c_attn.q_weight", |
|
"shape": [ |
|
256, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 19034112 |
|
}, |
|
{ |
|
"name": "transformer.h.20.attn.c_attn.q_scale", |
|
"shape": [ |
|
64, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 786432, |
|
"byteOffset": 25325568 |
|
}, |
|
{ |
|
"name": "transformer.h.20.attn.c_proj.q_weight", |
|
"shape": [ |
|
256, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26112000 |
|
}, |
|
{ |
|
"name": "transformer.h.20.attn.c_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28209152 |
|
}, |
|
{ |
|
"name": "transformer.h.20.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28471296 |
|
}, |
|
{ |
|
"name": "transformer.h.20.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28475392 |
|
} |
|
], |
|
"md5sum": "43bc6e38b4026d041510933119b6d841" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28479488, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.20.mlp.c_proj.q_weight", |
|
"shape": [ |
|
688, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5636096, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.20.mlp.c_proj.q_scale", |
|
"shape": [ |
|
172, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 704512, |
|
"byteOffset": 5636096 |
|
}, |
|
{ |
|
"name": "transformer.h.20.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
256, |
|
11008 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 6340608 |
|
}, |
|
{ |
|
"name": "transformer.h.20.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
11008 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1409024, |
|
"byteOffset": 17612800 |
|
}, |
|
{ |
|
"name": "transformer.h.21.attn.c_attn.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 19021824 |
|
}, |
|
{ |
|
"name": "transformer.h.21.attn.c_attn.q_weight", |
|
"shape": [ |
|
256, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 19034112 |
|
}, |
|
{ |
|
"name": "transformer.h.21.attn.c_attn.q_scale", |
|
"shape": [ |
|
64, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 786432, |
|
"byteOffset": 25325568 |
|
}, |
|
{ |
|
"name": "transformer.h.21.attn.c_proj.q_weight", |
|
"shape": [ |
|
256, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26112000 |
|
}, |
|
{ |
|
"name": "transformer.h.21.attn.c_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28209152 |
|
}, |
|
{ |
|
"name": "transformer.h.21.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28471296 |
|
}, |
|
{ |
|
"name": "transformer.h.21.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28475392 |
|
} |
|
], |
|
"md5sum": "515ebbab4e302aa668094c8b7841effe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28479488, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.21.mlp.c_proj.q_weight", |
|
"shape": [ |
|
688, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5636096, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.21.mlp.c_proj.q_scale", |
|
"shape": [ |
|
172, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 704512, |
|
"byteOffset": 5636096 |
|
}, |
|
{ |
|
"name": "transformer.h.21.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
256, |
|
11008 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 6340608 |
|
}, |
|
{ |
|
"name": "transformer.h.21.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
11008 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1409024, |
|
"byteOffset": 17612800 |
|
}, |
|
{ |
|
"name": "transformer.h.22.attn.c_attn.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 19021824 |
|
}, |
|
{ |
|
"name": "transformer.h.22.attn.c_attn.q_weight", |
|
"shape": [ |
|
256, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 19034112 |
|
}, |
|
{ |
|
"name": "transformer.h.22.attn.c_attn.q_scale", |
|
"shape": [ |
|
64, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 786432, |
|
"byteOffset": 25325568 |
|
}, |
|
{ |
|
"name": "transformer.h.22.attn.c_proj.q_weight", |
|
"shape": [ |
|
256, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26112000 |
|
}, |
|
{ |
|
"name": "transformer.h.22.attn.c_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28209152 |
|
}, |
|
{ |
|
"name": "transformer.h.22.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28471296 |
|
}, |
|
{ |
|
"name": "transformer.h.22.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28475392 |
|
} |
|
], |
|
"md5sum": "ab04377343cac28463bbc4f1a6a1b3e8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28479488, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.22.mlp.c_proj.q_weight", |
|
"shape": [ |
|
688, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5636096, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mlp.c_proj.q_scale", |
|
"shape": [ |
|
172, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 704512, |
|
"byteOffset": 5636096 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
256, |
|
11008 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 6340608 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
11008 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1409024, |
|
"byteOffset": 17612800 |
|
}, |
|
{ |
|
"name": "transformer.h.23.attn.c_attn.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 19021824 |
|
}, |
|
{ |
|
"name": "transformer.h.23.attn.c_attn.q_weight", |
|
"shape": [ |
|
256, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 19034112 |
|
}, |
|
{ |
|
"name": "transformer.h.23.attn.c_attn.q_scale", |
|
"shape": [ |
|
64, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 786432, |
|
"byteOffset": 25325568 |
|
}, |
|
{ |
|
"name": "transformer.h.23.attn.c_proj.q_weight", |
|
"shape": [ |
|
256, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26112000 |
|
}, |
|
{ |
|
"name": "transformer.h.23.attn.c_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28209152 |
|
}, |
|
{ |
|
"name": "transformer.h.23.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28471296 |
|
}, |
|
{ |
|
"name": "transformer.h.23.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28475392 |
|
} |
|
], |
|
"md5sum": "0542605592ffbe38e318a1ad1e627a3e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28483584, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.23.mlp.c_proj.q_weight", |
|
"shape": [ |
|
688, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5636096, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.23.mlp.c_proj.q_scale", |
|
"shape": [ |
|
172, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 704512, |
|
"byteOffset": 5636096 |
|
}, |
|
{ |
|
"name": "transformer.h.23.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
256, |
|
11008 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 6340608 |
|
}, |
|
{ |
|
"name": "transformer.h.23.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
11008 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1409024, |
|
"byteOffset": 17612800 |
|
}, |
|
{ |
|
"name": "transformer.ln_f.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 19021824 |
|
}, |
|
{ |
|
"name": "transformer.h.0.attn.c_attn.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 19025920 |
|
}, |
|
{ |
|
"name": "transformer.h.0.attn.c_attn.q_weight", |
|
"shape": [ |
|
256, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 19038208 |
|
}, |
|
{ |
|
"name": "transformer.h.0.attn.c_attn.q_scale", |
|
"shape": [ |
|
64, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 786432, |
|
"byteOffset": 25329664 |
|
}, |
|
{ |
|
"name": "transformer.h.0.attn.c_proj.q_weight", |
|
"shape": [ |
|
256, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26116096 |
|
}, |
|
{ |
|
"name": "transformer.h.0.attn.c_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28213248 |
|
}, |
|
{ |
|
"name": "transformer.h.0.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28475392 |
|
}, |
|
{ |
|
"name": "transformer.h.0.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28479488 |
|
} |
|
], |
|
"md5sum": "37e60dcede917d8b567dcf183b8f6fc8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28479488, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.0.mlp.c_proj.q_weight", |
|
"shape": [ |
|
688, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5636096, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.c_proj.q_scale", |
|
"shape": [ |
|
172, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 704512, |
|
"byteOffset": 5636096 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
256, |
|
11008 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 6340608 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
11008 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1409024, |
|
"byteOffset": 17612800 |
|
}, |
|
{ |
|
"name": "transformer.h.1.attn.c_attn.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 19021824 |
|
}, |
|
{ |
|
"name": "transformer.h.1.attn.c_attn.q_weight", |
|
"shape": [ |
|
256, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 19034112 |
|
}, |
|
{ |
|
"name": "transformer.h.1.attn.c_attn.q_scale", |
|
"shape": [ |
|
64, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 786432, |
|
"byteOffset": 25325568 |
|
}, |
|
{ |
|
"name": "transformer.h.1.attn.c_proj.q_weight", |
|
"shape": [ |
|
256, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26112000 |
|
}, |
|
{ |
|
"name": "transformer.h.1.attn.c_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28209152 |
|
}, |
|
{ |
|
"name": "transformer.h.1.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28471296 |
|
}, |
|
{ |
|
"name": "transformer.h.1.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28475392 |
|
} |
|
], |
|
"md5sum": "506d468c4be56c6931de2181c0e568ef" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28479488, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.1.mlp.c_proj.q_weight", |
|
"shape": [ |
|
688, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5636096, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.c_proj.q_scale", |
|
"shape": [ |
|
172, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 704512, |
|
"byteOffset": 5636096 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
256, |
|
11008 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 6340608 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
11008 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1409024, |
|
"byteOffset": 17612800 |
|
}, |
|
{ |
|
"name": "transformer.h.10.attn.c_attn.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 19021824 |
|
}, |
|
{ |
|
"name": "transformer.h.10.attn.c_attn.q_weight", |
|
"shape": [ |
|
256, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 19034112 |
|
}, |
|
{ |
|
"name": "transformer.h.10.attn.c_attn.q_scale", |
|
"shape": [ |
|
64, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 786432, |
|
"byteOffset": 25325568 |
|
}, |
|
{ |
|
"name": "transformer.h.10.attn.c_proj.q_weight", |
|
"shape": [ |
|
256, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26112000 |
|
}, |
|
{ |
|
"name": "transformer.h.10.attn.c_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28209152 |
|
}, |
|
{ |
|
"name": "transformer.h.10.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28471296 |
|
}, |
|
{ |
|
"name": "transformer.h.10.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28475392 |
|
} |
|
], |
|
"md5sum": "3ef6f46eacceb2b8f0f6bfe3e0533728" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28479488, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.10.mlp.c_proj.q_weight", |
|
"shape": [ |
|
688, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5636096, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mlp.c_proj.q_scale", |
|
"shape": [ |
|
172, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 704512, |
|
"byteOffset": 5636096 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
256, |
|
11008 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 6340608 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
11008 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1409024, |
|
"byteOffset": 17612800 |
|
}, |
|
{ |
|
"name": "transformer.h.11.attn.c_attn.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 19021824 |
|
}, |
|
{ |
|
"name": "transformer.h.11.attn.c_attn.q_weight", |
|
"shape": [ |
|
256, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 19034112 |
|
}, |
|
{ |
|
"name": "transformer.h.11.attn.c_attn.q_scale", |
|
"shape": [ |
|
64, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 786432, |
|
"byteOffset": 25325568 |
|
}, |
|
{ |
|
"name": "transformer.h.11.attn.c_proj.q_weight", |
|
"shape": [ |
|
256, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26112000 |
|
}, |
|
{ |
|
"name": "transformer.h.11.attn.c_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28209152 |
|
}, |
|
{ |
|
"name": "transformer.h.11.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28471296 |
|
}, |
|
{ |
|
"name": "transformer.h.11.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28475392 |
|
} |
|
], |
|
"md5sum": "11d8caa859eb3037bda85f770b6a3622" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28479488, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.11.mlp.c_proj.q_weight", |
|
"shape": [ |
|
688, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5636096, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mlp.c_proj.q_scale", |
|
"shape": [ |
|
172, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 704512, |
|
"byteOffset": 5636096 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
256, |
|
11008 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 6340608 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
11008 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1409024, |
|
"byteOffset": 17612800 |
|
}, |
|
{ |
|
"name": "transformer.h.12.attn.c_attn.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 19021824 |
|
}, |
|
{ |
|
"name": "transformer.h.12.attn.c_attn.q_weight", |
|
"shape": [ |
|
256, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 19034112 |
|
}, |
|
{ |
|
"name": "transformer.h.12.attn.c_attn.q_scale", |
|
"shape": [ |
|
64, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 786432, |
|
"byteOffset": 25325568 |
|
}, |
|
{ |
|
"name": "transformer.h.12.attn.c_proj.q_weight", |
|
"shape": [ |
|
256, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26112000 |
|
}, |
|
{ |
|
"name": "transformer.h.12.attn.c_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28209152 |
|
}, |
|
{ |
|
"name": "transformer.h.12.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28471296 |
|
}, |
|
{ |
|
"name": "transformer.h.12.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28475392 |
|
} |
|
], |
|
"md5sum": "4c541ff06cef85d6a0e57ade16974aa8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28479488, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.12.mlp.c_proj.q_weight", |
|
"shape": [ |
|
688, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5636096, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mlp.c_proj.q_scale", |
|
"shape": [ |
|
172, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 704512, |
|
"byteOffset": 5636096 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
256, |
|
11008 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 6340608 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
11008 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1409024, |
|
"byteOffset": 17612800 |
|
}, |
|
{ |
|
"name": "transformer.h.13.attn.c_attn.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 19021824 |
|
}, |
|
{ |
|
"name": "transformer.h.13.attn.c_attn.q_weight", |
|
"shape": [ |
|
256, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 19034112 |
|
}, |
|
{ |
|
"name": "transformer.h.13.attn.c_attn.q_scale", |
|
"shape": [ |
|
64, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 786432, |
|
"byteOffset": 25325568 |
|
}, |
|
{ |
|
"name": "transformer.h.13.attn.c_proj.q_weight", |
|
"shape": [ |
|
256, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26112000 |
|
}, |
|
{ |
|
"name": "transformer.h.13.attn.c_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28209152 |
|
}, |
|
{ |
|
"name": "transformer.h.13.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28471296 |
|
}, |
|
{ |
|
"name": "transformer.h.13.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28475392 |
|
} |
|
], |
|
"md5sum": "52c0f30ffc4c941bd17767bb30260b6a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28483584, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.13.mlp.c_proj.q_weight", |
|
"shape": [ |
|
688, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5636096, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mlp.c_proj.q_scale", |
|
"shape": [ |
|
172, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 704512, |
|
"byteOffset": 5636096 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
256, |
|
11008 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 6340608 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
11008 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1409024, |
|
"byteOffset": 17612800 |
|
}, |
|
{ |
|
"name": "transformer.h.14.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 19021824 |
|
}, |
|
{ |
|
"name": "transformer.h.2.attn.c_attn.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 19025920 |
|
}, |
|
{ |
|
"name": "transformer.h.2.attn.c_attn.q_weight", |
|
"shape": [ |
|
256, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 19038208 |
|
}, |
|
{ |
|
"name": "transformer.h.2.attn.c_attn.q_scale", |
|
"shape": [ |
|
64, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 786432, |
|
"byteOffset": 25329664 |
|
}, |
|
{ |
|
"name": "transformer.h.2.attn.c_proj.q_weight", |
|
"shape": [ |
|
256, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26116096 |
|
}, |
|
{ |
|
"name": "transformer.h.2.attn.c_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28213248 |
|
}, |
|
{ |
|
"name": "transformer.h.2.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28475392 |
|
}, |
|
{ |
|
"name": "transformer.h.2.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28479488 |
|
} |
|
], |
|
"md5sum": "4451758a6feb0da995bcff710427eb8a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28479488, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.2.mlp.c_proj.q_weight", |
|
"shape": [ |
|
688, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5636096, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mlp.c_proj.q_scale", |
|
"shape": [ |
|
172, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 704512, |
|
"byteOffset": 5636096 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
256, |
|
11008 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 6340608 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
11008 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1409024, |
|
"byteOffset": 17612800 |
|
}, |
|
{ |
|
"name": "transformer.h.3.attn.c_attn.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 19021824 |
|
}, |
|
{ |
|
"name": "transformer.h.3.attn.c_attn.q_weight", |
|
"shape": [ |
|
256, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 19034112 |
|
}, |
|
{ |
|
"name": "transformer.h.3.attn.c_attn.q_scale", |
|
"shape": [ |
|
64, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 786432, |
|
"byteOffset": 25325568 |
|
}, |
|
{ |
|
"name": "transformer.h.3.attn.c_proj.q_weight", |
|
"shape": [ |
|
256, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26112000 |
|
}, |
|
{ |
|
"name": "transformer.h.3.attn.c_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28209152 |
|
}, |
|
{ |
|
"name": "transformer.h.3.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28471296 |
|
}, |
|
{ |
|
"name": "transformer.h.3.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28475392 |
|
} |
|
], |
|
"md5sum": "067eba818ad7ce39c5b1a5c1c962a370" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28479488, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.3.mlp.c_proj.q_weight", |
|
"shape": [ |
|
688, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5636096, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mlp.c_proj.q_scale", |
|
"shape": [ |
|
172, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 704512, |
|
"byteOffset": 5636096 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
256, |
|
11008 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 6340608 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
11008 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1409024, |
|
"byteOffset": 17612800 |
|
}, |
|
{ |
|
"name": "transformer.h.4.attn.c_attn.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 19021824 |
|
}, |
|
{ |
|
"name": "transformer.h.4.attn.c_attn.q_weight", |
|
"shape": [ |
|
256, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 19034112 |
|
}, |
|
{ |
|
"name": "transformer.h.4.attn.c_attn.q_scale", |
|
"shape": [ |
|
64, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 786432, |
|
"byteOffset": 25325568 |
|
}, |
|
{ |
|
"name": "transformer.h.4.attn.c_proj.q_weight", |
|
"shape": [ |
|
256, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26112000 |
|
}, |
|
{ |
|
"name": "transformer.h.4.attn.c_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28209152 |
|
}, |
|
{ |
|
"name": "transformer.h.4.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28471296 |
|
}, |
|
{ |
|
"name": "transformer.h.4.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28475392 |
|
} |
|
], |
|
"md5sum": "8f79e478fb710d8ba8083d492a6705cc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28479488, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.4.mlp.c_proj.q_weight", |
|
"shape": [ |
|
688, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5636096, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mlp.c_proj.q_scale", |
|
"shape": [ |
|
172, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 704512, |
|
"byteOffset": 5636096 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
256, |
|
11008 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 6340608 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
11008 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1409024, |
|
"byteOffset": 17612800 |
|
}, |
|
{ |
|
"name": "transformer.h.5.attn.c_attn.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 19021824 |
|
}, |
|
{ |
|
"name": "transformer.h.5.attn.c_attn.q_weight", |
|
"shape": [ |
|
256, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 19034112 |
|
}, |
|
{ |
|
"name": "transformer.h.5.attn.c_attn.q_scale", |
|
"shape": [ |
|
64, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 786432, |
|
"byteOffset": 25325568 |
|
}, |
|
{ |
|
"name": "transformer.h.5.attn.c_proj.q_weight", |
|
"shape": [ |
|
256, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26112000 |
|
}, |
|
{ |
|
"name": "transformer.h.5.attn.c_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28209152 |
|
}, |
|
{ |
|
"name": "transformer.h.5.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28471296 |
|
}, |
|
{ |
|
"name": "transformer.h.5.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28475392 |
|
} |
|
], |
|
"md5sum": "27a2164ab34e2d36ebe6ad2dfc7f52e9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28479488, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.5.mlp.c_proj.q_weight", |
|
"shape": [ |
|
688, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5636096, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mlp.c_proj.q_scale", |
|
"shape": [ |
|
172, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 704512, |
|
"byteOffset": 5636096 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
256, |
|
11008 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 6340608 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
11008 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1409024, |
|
"byteOffset": 17612800 |
|
}, |
|
{ |
|
"name": "transformer.h.6.attn.c_attn.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 19021824 |
|
}, |
|
{ |
|
"name": "transformer.h.6.attn.c_attn.q_weight", |
|
"shape": [ |
|
256, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 19034112 |
|
}, |
|
{ |
|
"name": "transformer.h.6.attn.c_attn.q_scale", |
|
"shape": [ |
|
64, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 786432, |
|
"byteOffset": 25325568 |
|
}, |
|
{ |
|
"name": "transformer.h.6.attn.c_proj.q_weight", |
|
"shape": [ |
|
256, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26112000 |
|
}, |
|
{ |
|
"name": "transformer.h.6.attn.c_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28209152 |
|
}, |
|
{ |
|
"name": "transformer.h.6.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28471296 |
|
}, |
|
{ |
|
"name": "transformer.h.6.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28475392 |
|
} |
|
], |
|
"md5sum": "26def18cdc64becee6079e2a16295b4c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28479488, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.6.mlp.c_proj.q_weight", |
|
"shape": [ |
|
688, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5636096, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mlp.c_proj.q_scale", |
|
"shape": [ |
|
172, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 704512, |
|
"byteOffset": 5636096 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
256, |
|
11008 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 6340608 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
11008 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1409024, |
|
"byteOffset": 17612800 |
|
}, |
|
{ |
|
"name": "transformer.h.7.attn.c_attn.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 19021824 |
|
}, |
|
{ |
|
"name": "transformer.h.7.attn.c_attn.q_weight", |
|
"shape": [ |
|
256, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 19034112 |
|
}, |
|
{ |
|
"name": "transformer.h.7.attn.c_attn.q_scale", |
|
"shape": [ |
|
64, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 786432, |
|
"byteOffset": 25325568 |
|
}, |
|
{ |
|
"name": "transformer.h.7.attn.c_proj.q_weight", |
|
"shape": [ |
|
256, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26112000 |
|
}, |
|
{ |
|
"name": "transformer.h.7.attn.c_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28209152 |
|
}, |
|
{ |
|
"name": "transformer.h.7.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28471296 |
|
}, |
|
{ |
|
"name": "transformer.h.7.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28475392 |
|
} |
|
], |
|
"md5sum": "7c0ac19942d64b79a4fd2ddd8f3f771e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28479488, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.7.mlp.c_proj.q_weight", |
|
"shape": [ |
|
688, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5636096, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mlp.c_proj.q_scale", |
|
"shape": [ |
|
172, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 704512, |
|
"byteOffset": 5636096 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
256, |
|
11008 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 6340608 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
11008 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1409024, |
|
"byteOffset": 17612800 |
|
}, |
|
{ |
|
"name": "transformer.h.8.attn.c_attn.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 19021824 |
|
}, |
|
{ |
|
"name": "transformer.h.8.attn.c_attn.q_weight", |
|
"shape": [ |
|
256, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 19034112 |
|
}, |
|
{ |
|
"name": "transformer.h.8.attn.c_attn.q_scale", |
|
"shape": [ |
|
64, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 786432, |
|
"byteOffset": 25325568 |
|
}, |
|
{ |
|
"name": "transformer.h.8.attn.c_proj.q_weight", |
|
"shape": [ |
|
256, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26112000 |
|
}, |
|
{ |
|
"name": "transformer.h.8.attn.c_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28209152 |
|
}, |
|
{ |
|
"name": "transformer.h.8.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28471296 |
|
}, |
|
{ |
|
"name": "transformer.h.8.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28475392 |
|
} |
|
], |
|
"md5sum": "dc0d14465ae623462b7ba730fc5a9f0e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28479488, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.8.mlp.c_proj.q_weight", |
|
"shape": [ |
|
688, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5636096, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mlp.c_proj.q_scale", |
|
"shape": [ |
|
172, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 704512, |
|
"byteOffset": 5636096 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
256, |
|
11008 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 6340608 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
11008 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1409024, |
|
"byteOffset": 17612800 |
|
}, |
|
{ |
|
"name": "transformer.h.9.attn.c_attn.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 19021824 |
|
}, |
|
{ |
|
"name": "transformer.h.9.attn.c_attn.q_weight", |
|
"shape": [ |
|
256, |
|
6144 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 19034112 |
|
}, |
|
{ |
|
"name": "transformer.h.9.attn.c_attn.q_scale", |
|
"shape": [ |
|
64, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 786432, |
|
"byteOffset": 25325568 |
|
}, |
|
{ |
|
"name": "transformer.h.9.attn.c_proj.q_weight", |
|
"shape": [ |
|
256, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26112000 |
|
}, |
|
{ |
|
"name": "transformer.h.9.attn.c_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28209152 |
|
}, |
|
{ |
|
"name": "transformer.h.9.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28471296 |
|
}, |
|
{ |
|
"name": "transformer.h.9.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28475392 |
|
} |
|
], |
|
"md5sum": "1ed3df499c4a17db99c05a1e35062e88" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 155582464, |
|
"records": [ |
|
{ |
|
"name": "transformer.wte.q_weight", |
|
"shape": [ |
|
151936, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 155582464, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8b1ffdae0c8efb2938a2acbb5235e601" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 19447808, |
|
"records": [ |
|
{ |
|
"name": "transformer.wte.q_scale", |
|
"shape": [ |
|
151936, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 19447808, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ad620bed8cb3980cf1f2a9a6a43de187" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 19021824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.9.mlp.c_proj.q_weight", |
|
"shape": [ |
|
688, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5636096, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mlp.c_proj.q_scale", |
|
"shape": [ |
|
172, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 704512, |
|
"byteOffset": 5636096 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
256, |
|
11008 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 6340608 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
11008 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1409024, |
|
"byteOffset": 17612800 |
|
} |
|
], |
|
"md5sum": "95fee7353e4ba5d43350cfa8b51154ea" |
|
} |
|
] |
|
} |