diff --git "a/ndarray-cache-b16.json" "b/ndarray-cache-b16.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache-b16.json" @@ -0,0 +1,6095 @@ +{ + "metadata": { + "ParamSize": 445, + "ParamBytes": 8858030080.0, + "BitsPerParam": 5.001961295228238 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 389283840, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 152064, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 389283840, + "byteOffset": 0 + } + ], + "md5sum": "9924dae6de4c54615afce8ae05066461" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 48660480, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 152064, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 48660480, + "byteOffset": 0 + } + ], + "md5sum": "88339473662d52ec1a37a2715afa0b85" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.0.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "ee9431a17d9b83b4bdd98e97aa2439d2" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.0.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "537211d63edb83dbf6c568af82f95285" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 389283840, + "records": [ + { + "name": "transformer.wte.q_weight", + "shape": [ + 152064, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 389283840, + "byteOffset": 0 + } + ], + "md5sum": "8b8cfdbe3a3a677776ff7a9e363ce0d9" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 48660480, + "records": [ + { + "name": "transformer.wte.q_scale", + "shape": [ + 152064, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 48660480, + "byteOffset": 0 + } + ], + "md5sum": "324e084c2d9e2c6461b1c42dc465f5e2" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.0.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "6b72494212f00e7a6a750a4eccc441c3" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.1.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "d7252f03f3647a12fc5ec786b1b9a12b" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 32890880, + "records": [ + { + "name": "transformer.h.0.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 0 + }, + { + "name": "transformer.h.0.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 30720 + }, + { + "name": "transformer.h.0.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4945920 + }, + { + "name": "transformer.h.0.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18053120 + }, + { + "name": "transformer.h.0.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19691520 + }, + { + "name": "transformer.h.0.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19701760 + }, + { + "name": "transformer.h.0.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 19712000 + }, + { + "name": "transformer.h.0.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 28477440 + }, + { + "name": "transformer.h.1.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32860160 + } + ], + "md5sum": "c7bdc4e1ba73d234884b5aed1a31c56f" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.1.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "c88328f032eb0d236094664130f7e24c" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.1.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "ad37a47ca1b0367bfeb0e8d8708c774c" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.2.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "1ac414f22bea070f0727918b198f964f" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.1.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.1.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.1.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.1.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.1.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.1.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.1.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.2.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "94f2eb0ea786ad2679fa8773488228ba" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.2.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "b7e6cb2dab84c79b9d2368f1ce6ddaa1" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.2.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "82b55b58573f63d67f067cb09799cabb" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.3.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "b4ad8c927092b1f9eec186dc3f8b0576" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.2.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.2.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.2.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.2.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.2.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.2.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.2.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.3.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "67eb261d0fa2d11b94e0e908a843b0f7" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.3.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "85da5f11de4fe3d28f4894506f84167d" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.3.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "4ae56825e933c9f7f701786fe0f91d61" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.10.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "e0f3a0f7f97375f623c60a8f338539f0" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.3.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.3.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.3.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.3.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.3.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.3.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.3.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.4.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.10.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "0cabafe99e059ec0c69286b251dce55c" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.10.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "3ad33a1e132105e97744449d5af46a72" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.10.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "7836627647ca04d6822f13a912a83768" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.11.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "d81d831128a08f49c89ee5e2c700a854" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.10.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.10.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.10.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.10.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.10.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.10.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.11.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "5f5d334660fad65b7dc6480e89b8b6bf" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.11.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "6a17a528a479b7ba9de612572d9b4eeb" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.11.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "bbd5157c59c410297382b4bee47cbdd7" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.12.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "0a6029076418c278d9264446b9a2fa46" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.11.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.11.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.11.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.11.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.11.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.11.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.11.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.12.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "602b76b5dbc191663ec7d29a4e36a23d" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.12.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "9bfb30b01eec70d50a0da68e565a85d2" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.12.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "04af42e46111ca664e57348bbd969147" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.7.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "4772f5cb0dbc58d77ff1fda7f01cbc33" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 32880640, + "records": [ + { + "name": "transformer.h.12.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.12.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.12.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.12.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.12.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.12.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.12.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.13.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.10.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32839680 + }, + { + "name": "transformer.h.7.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32849920 + } + ], + "md5sum": "7e9e42e0e0bfaf0294428fdeb32eff1a" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.7.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "154ec2129f5008be74c9c88473da31c0" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.7.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "4bc9664640c048363775de60eceb6729" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.8.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "ddb8f93509ddf4704b40d002e4328522" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.7.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.7.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.7.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.7.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.7.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.7.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.8.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "771dbf90de3956bb58c84fea5abd6933" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.8.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "58e190e766c68e54279068b2c01b89d4" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.8.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "b558e342db5bf000866ecb38b2c33fc7" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.9.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "76d528af3ab61882d04c14129ecbaa7e" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.8.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.8.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.8.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.8.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.8.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.8.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.8.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.9.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "b41c1d35686cde59a2594e504fcc3a68" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.9.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "5bbdc0e323687d2d011e0ceb47faff8b" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.9.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "8ec7b2f9c10945b4b1f5e2b28652160b" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.13.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "0187d5461db122e76cbfdd2d33040a0f" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.9.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.9.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.9.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.9.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.9.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.9.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.9.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.13.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "de93cf594921a28049f9bd86cefbaafa" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.13.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "5ea625bda03ef285ff47510e141625d2" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.13.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "dbb88e60327cf494ca34956740652df0" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.14.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "e246abbdcc9023d7bbbe14bfa4841570" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.13.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.13.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.13.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.13.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.13.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.13.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.14.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "22c9e2927398e2e24389a6310bda41c9" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.14.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "56e08c3d62cda55baad64ea4ee55d184" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.14.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "5d6c612b1bfe9459f92250d67d37795b" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.15.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "5733ef99fbefae9c94aa5491d39f2e19" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.14.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.14.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.14.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.14.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.14.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.14.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.14.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.15.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "362560a5cbc994a8e0c475729b81ed06" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.15.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "49edbb03ad45d41e702e24c775ef299a" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.15.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "9795df5a2039599f344afc928abd7b72" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.16.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "f49ac286f6a9b18339cd4966906cddc4" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.15.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.15.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.15.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.15.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.15.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.15.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.15.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.16.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.16.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "1ee346cd515412f74afea6375906e886" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.16.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "7d118c520c50f57c6169b1ed98fcce7a" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.16.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "84a6024bdf36187d58c1ad3feec3d56b" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.17.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "b2f508e836552e854a071023b55f65cb" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.16.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.16.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.16.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.16.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.16.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.16.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.17.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "3c414cb65c4e802ff78b9bc15346c424" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.17.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "f393b00ad4b314d17c425b1c22096596" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.17.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "f0ddd28a7cd4e9d79ee46df0655313bd" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.18.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "1fc631e642c4d83f265935a8b8f68fb9" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.17.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.17.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.17.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.17.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.17.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.17.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.17.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.18.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "7d8446174811d00ca88eff6df93bc597" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.18.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "3fb69fced6c97e4236099245d12eb426" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.18.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "17c8ff566e42467494d562755f609170" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.19.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "1566725ff59a53395373f4574cb2313e" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.18.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.18.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.18.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.18.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.18.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.18.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.18.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.19.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.19.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "e8fcbc8a1d5a277e398e92cc7a677a1e" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.19.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "5c1a95952256a5498ccfb94b5aa2260d" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.19.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "4423eb0247c9a543c5a79462fd87e0eb" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.20.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "03810ea94fd581815cb70d30e3dc7b4a" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.19.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.19.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.19.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.19.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.19.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.19.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.20.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "86a515e5a725863863593968efc56935" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.20.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "afe25494a905fadbb88c3a9645eb27e6" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.20.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "e771c7c8760b79e591ae5880afc0053d" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.21.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "cb46f1d8d5bba9c760387b07b78187e9" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.20.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.20.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.20.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.20.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.20.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.20.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.20.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.21.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "d2bfa31f4edbe05c06ff2d4cd81fb390" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.21.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "3f093ea1a6ba9ddc1269e2951e893335" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.21.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "4a129e267ecde19993252b11bc092c4c" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.22.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "3c100126712a1bec0b381d503bbfbf72" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.21.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.21.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.21.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.21.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.21.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.21.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.21.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.22.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.22.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "41b612ce189209b218b81314037635ef" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.22.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "a56418a34f4e4c70eb97735631808123" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.22.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "94a1350b04b471f47bc8270d65fb627b" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.23.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "efd753fef61814cba77b344cc53b9212" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.22.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.22.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.22.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.22.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.22.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.22.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.23.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "a1bab9dc8d12061a979d585126bc2471" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.23.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "b965a19263720dc3c853c2c12f665980" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.23.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "32127bcd4d4ea161cc8ac91cd8d82286" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.24.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "fc1a918f79cf48da60dc3d54822b4471" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.23.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.23.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.23.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.23.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.23.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.23.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.23.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.24.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "15d2637387a9a4b6ac34e7471a6b0dfd" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.24.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "42e337db1656cd9191ffa874d3860c5c" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.24.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "f643f01316fd7637f83979fe4f4a5994" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.25.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "be68d4c2fad011d74f7b2e5e46e797f0" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.24.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.24.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.24.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.24.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.24.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.24.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.24.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.25.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.25.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "e954fb0120a16a7fa91a8ddf254433e3" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.25.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "68d175eee215a0a5b7110b5875d5ec71" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.25.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "41aa78ae12b594fa23df8aba895eabff" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.26.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "8281507a523934f0591ef7bfb71244dc" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.25.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.25.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.25.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.25.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.25.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.25.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.26.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "d837253c7b99f6629e61910c3df01017" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.26.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "47a84cd555481d8fbb57054ddfffeb58" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.26.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "6c83a5f59329e0923b312c830379b0e0" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.27.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "ebb58a78a014c8c594bcf18d30c7b4ad" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.26.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.26.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.26.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.26.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.26.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.26.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.26.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.27.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "18a7df21df7532130e0ce3e3a0f30199" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.27.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "e1c96efddfa353a029ab00637cace2b2" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.27.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "6317fd6cd8df92618465ba5779ff7044" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.28.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "37eb4e615b7b59861439ac97dcb03538" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.27.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.27.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.27.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.27.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.27.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.27.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.27.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.28.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.28.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "c37ad6ba8dcc374a9ab9e1943379708b" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.28.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "423378676d449ae03b82b060328312bc" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.28.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "e63e805c892c5dd63da704e3d4d60df4" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.29.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "326543bb857bbda805c00f686f2efdda" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.28.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.28.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.28.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.28.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.28.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.28.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.29.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "e7a30740f1cfaaebc41ff2cc7f9cf35d" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.29.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "9a87770823982848b2a8d785c0467ef7" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.29.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "9b3b9f366542d5958b5a7632da0d43d6" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.30.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "cf798b227f9a5d6b51a6fca58a3ab76c" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.29.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.29.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.29.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.29.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.29.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.29.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.29.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.30.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "9370eee805f98c513e84dd73f7aad115" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.30.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "a09dce6ceaf4955a4a029eacbc9262b2" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.30.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "f608e1f10a13c182a7e1f6812a14d806" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.31.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "66d82ee61d40b5f9cb0577a531896147" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.30.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.30.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.30.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.30.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.30.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.30.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.30.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.31.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.31.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "c9568a1b04035b1eb5415d7b395f0ba4" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.31.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "b3b3f1b7845063465589a8ef2a8a80d4" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.31.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "c75969c1dc97ae3ab733a26ef0352de7" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.32.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "df74d11b514a054e9c68c5f7f4c5f15a" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.31.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.31.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.31.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.31.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.31.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.31.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.32.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "27ead50f2cd3e52b785c732da0040830" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.32.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "be192cb6d159f7813a0cf5e8dd5178c5" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.32.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "82cc8a93a394e6059d7aca306308f3a5" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.33.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "e848fd18de8c098e8e90b380a51983e3" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.32.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.32.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.32.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.32.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.32.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.32.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.32.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.33.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "4ec3e90791139e43bbb94e490ff523a1" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.33.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "fc6aac577987525f855681b3755cf0f1" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.33.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "63687adca714c6861265508d9d8aba1d" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.34.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "2c38fe49bb49dbb4a320ded823e8ba8d" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.33.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.33.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.33.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.33.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.33.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.33.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.33.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.34.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.34.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "ec4e8c3f94e3afd854a8b38a35d72cb9" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.34.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "e359cbd903d54240a0d67cb9a14f15be" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.34.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "42f303933d372455229d8a42135eae8d" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.35.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "8c2dcd44bc8be6358de5b840f1e77d4b" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.34.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.34.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.34.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.34.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.34.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.34.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.35.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "b109893d7101eb36cd1b8297701aa8fe" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.35.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "af9d0b5ccd5033274aa007467538179e" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.35.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "30b980280c0ebf3ba406bd3557a5b6d4" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.36.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "0c98602a827b55ca3a3be7b424ea9ac3" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.35.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.35.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.35.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.35.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.35.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.35.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.35.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.36.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "1f68e15c80896acb7699d006c4174685" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.36.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "224fc0184765b20a54c7d8749b6f886b" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.36.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "5cc2ef522e76e2a8a825634e849290e9" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.37.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "19ee4f92095f637d12199f4f0fd576db" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.36.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.36.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.36.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.36.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.36.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.36.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.36.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.37.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.37.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "0f506a1f65edb8a9306080bb4000726f" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.37.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "f296779fdfe39794398505fe591fad86" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.37.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "38dd99e729075fc31c1013882ba69c50" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.38.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "f8125b6314383c8ed6c47966d92632d8" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.37.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.37.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.37.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.37.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.37.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.37.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.38.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "d1ee4b7e370ecea81645b52ac4bc713e" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.38.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "8ea1af26e5c68b71aab2e42b61394a87" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.38.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "44090d401acf8796c78359475002a704" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.39.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "42fa7de06043cc49ff3e0b234a5ed40f" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.38.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.38.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.38.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.38.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.38.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.38.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.38.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.39.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "abf50d18a699519ae9ffa643c00e894b" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.39.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "b8655dff40941641c56e4accfb3801e6" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.39.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "186a8808fbab298e1090750e54222f32" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.4.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "96889b9c475223619d7514f55ef9df72" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.39.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.39.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.39.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.39.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.39.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.39.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.39.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.ln_f.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.4.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "335c2c770ef99da13a11bdc59e389087" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.4.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "490eda10b06c0404a0e31cbc1a6de754" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.4.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "da6da39bbfdbf4e3d20673d000570662" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.5.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "2bfc228f3eb02dbcc948a4ea5aed8da9" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.4.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.4.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.4.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.4.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.4.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.4.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.5.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "18c52e7d8383b663da1178008b601ebd" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.5.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "3eef47186928c5f7abeeb78e6821339c" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.5.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "a558a3d56fb45775daf4c8b9ef6e5603" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.6.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "f4139cd0409b2cd37ee064c78c0f34cf" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.5.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.5.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.5.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.5.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.5.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.5.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.5.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.6.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "c91a62585c1792f52555e74075479d61" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.6.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "34e32044b39fde438ec4f57083d841c2" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.6.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "d2fffe239acd9ec6fa731dbfef9b6300" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 32839680, + "records": [ + { + "name": "transformer.h.6.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.6.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.6.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.6.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.6.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.6.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.6.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.7.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32829440 + } + ], + "md5sum": "d9df0d9beaa754df73105d78d630bf9d" + } + ] +} \ No newline at end of file