diff --git a/mlc-chat-config.json b/mlc-chat-config.json new file mode 100644 index 0000000000000000000000000000000000000000..2559b11fff19de42d18e55a40c5f02002f1cbf33 --- /dev/null +++ b/mlc-chat-config.json @@ -0,0 +1,38 @@ +{ + "model_type": "qwen", + "quantization": "q4f32_1", + "model_config": { + "vocab_size": 152064, + "hidden_size": 5120, + "num_hidden_layers": 40, + "num_attention_heads": 40, + "layer_norm_epsilon": 1e-06, + "scale_attn_weights": true, + "kv_channels": 128, + "rotary_emb_base": 10000, + "intermediate_size": 27392, + "context_window_size": 8192, + "prefill_chunk_size": 8192, + "tensor_parallel_shards": 1 + }, + "vocab_size": 152064, + "context_window_size": 8192, + "sliding_window_size": -1, + "prefill_chunk_size": 8192, + "attention_sink_size": -1, + "tensor_parallel_shards": 1, + "mean_gen_len": 128, + "max_gen_len": 512, + "shift_fill_factor": 0.3, + "temperature": 0.7, + "repetition_penalty": 1.1, + "top_p": 0.8, + "conv_template": "chatml", + "pad_token_id": 151643, + "bos_token_id": 1, + "eos_token_id": 151643, + "tokenizer_files": [ + "tokenizer_config.json" + ], + "version": "0.1.0" +} \ No newline at end of file diff --git a/ndarray-cache-b16.json b/ndarray-cache-b16.json new file mode 100644 index 0000000000000000000000000000000000000000..58f54268dc67792eb3b88a8b9ed39934fda8e6c5 --- /dev/null +++ b/ndarray-cache-b16.json @@ -0,0 +1,6095 @@ +{ + "metadata": { + "ParamSize": 445, + "ParamBytes": 8858030080.0, + "BitsPerParam": 5.001961295228238 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 389283840, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 152064, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 389283840, + "byteOffset": 0 + } + ], + "md5sum": "9924dae6de4c54615afce8ae05066461" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 48660480, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 152064, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 48660480, + "byteOffset": 0 + } + ], + "md5sum": "88339473662d52ec1a37a2715afa0b85" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.0.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "ee9431a17d9b83b4bdd98e97aa2439d2" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.0.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "537211d63edb83dbf6c568af82f95285" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 389283840, + "records": [ + { + "name": "transformer.wte.q_weight", + "shape": [ + 152064, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 389283840, + "byteOffset": 0 + } + ], + "md5sum": "8b8cfdbe3a3a677776ff7a9e363ce0d9" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 48660480, + "records": [ + { + "name": "transformer.wte.q_scale", + "shape": [ + 152064, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 48660480, + "byteOffset": 0 + } + ], + "md5sum": "324e084c2d9e2c6461b1c42dc465f5e2" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.0.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "6b72494212f00e7a6a750a4eccc441c3" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.1.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "d7252f03f3647a12fc5ec786b1b9a12b" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 32890880, + "records": [ + { + "name": "transformer.h.0.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 0 + }, + { + "name": "transformer.h.0.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 30720 + }, + { + "name": "transformer.h.0.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4945920 + }, + { + "name": "transformer.h.0.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18053120 + }, + { + "name": "transformer.h.0.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19691520 + }, + { + "name": "transformer.h.0.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19701760 + }, + { + "name": "transformer.h.0.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 19712000 + }, + { + "name": "transformer.h.0.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 28477440 + }, + { + "name": "transformer.h.1.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32860160 + } + ], + "md5sum": "c7bdc4e1ba73d234884b5aed1a31c56f" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.1.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "c88328f032eb0d236094664130f7e24c" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.1.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "ad37a47ca1b0367bfeb0e8d8708c774c" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.2.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "1ac414f22bea070f0727918b198f964f" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.1.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.1.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.1.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.1.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.1.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.1.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.1.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.2.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "94f2eb0ea786ad2679fa8773488228ba" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.2.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "b7e6cb2dab84c79b9d2368f1ce6ddaa1" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.2.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "82b55b58573f63d67f067cb09799cabb" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.3.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "b4ad8c927092b1f9eec186dc3f8b0576" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.2.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.2.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.2.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.2.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.2.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.2.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.2.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.3.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "67eb261d0fa2d11b94e0e908a843b0f7" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.3.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "85da5f11de4fe3d28f4894506f84167d" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.3.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "4ae56825e933c9f7f701786fe0f91d61" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.10.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "e0f3a0f7f97375f623c60a8f338539f0" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.3.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.3.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.3.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.3.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.3.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.3.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.3.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.4.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.10.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "0cabafe99e059ec0c69286b251dce55c" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.10.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "3ad33a1e132105e97744449d5af46a72" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.10.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "7836627647ca04d6822f13a912a83768" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.11.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "d81d831128a08f49c89ee5e2c700a854" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.10.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.10.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.10.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.10.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.10.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.10.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.11.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "5f5d334660fad65b7dc6480e89b8b6bf" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.11.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "6a17a528a479b7ba9de612572d9b4eeb" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.11.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "bbd5157c59c410297382b4bee47cbdd7" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.12.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "0a6029076418c278d9264446b9a2fa46" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.11.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.11.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.11.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.11.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.11.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.11.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.11.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.12.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "602b76b5dbc191663ec7d29a4e36a23d" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.12.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "9bfb30b01eec70d50a0da68e565a85d2" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.12.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "04af42e46111ca664e57348bbd969147" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.7.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "4772f5cb0dbc58d77ff1fda7f01cbc33" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 32880640, + "records": [ + { + "name": "transformer.h.12.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.12.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.12.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.12.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.12.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.12.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.12.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.13.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.10.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32839680 + }, + { + "name": "transformer.h.7.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32849920 + } + ], + "md5sum": "7e9e42e0e0bfaf0294428fdeb32eff1a" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.7.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "154ec2129f5008be74c9c88473da31c0" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.7.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "4bc9664640c048363775de60eceb6729" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.8.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "ddb8f93509ddf4704b40d002e4328522" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.7.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.7.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.7.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.7.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.7.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.7.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.8.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "771dbf90de3956bb58c84fea5abd6933" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.8.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "58e190e766c68e54279068b2c01b89d4" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.8.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "b558e342db5bf000866ecb38b2c33fc7" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.9.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "76d528af3ab61882d04c14129ecbaa7e" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.8.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.8.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.8.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.8.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.8.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.8.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.8.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.9.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "b41c1d35686cde59a2594e504fcc3a68" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.9.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "5bbdc0e323687d2d011e0ceb47faff8b" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.9.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "8ec7b2f9c10945b4b1f5e2b28652160b" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.13.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "0187d5461db122e76cbfdd2d33040a0f" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.9.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.9.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.9.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.9.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.9.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.9.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.9.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.13.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "de93cf594921a28049f9bd86cefbaafa" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.13.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "5ea625bda03ef285ff47510e141625d2" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.13.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "dbb88e60327cf494ca34956740652df0" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.14.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "e246abbdcc9023d7bbbe14bfa4841570" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.13.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.13.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.13.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.13.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.13.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.13.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.14.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "22c9e2927398e2e24389a6310bda41c9" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.14.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "56e08c3d62cda55baad64ea4ee55d184" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.14.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "5d6c612b1bfe9459f92250d67d37795b" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.15.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "5733ef99fbefae9c94aa5491d39f2e19" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.14.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.14.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.14.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.14.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.14.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.14.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.14.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.15.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "362560a5cbc994a8e0c475729b81ed06" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.15.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "49edbb03ad45d41e702e24c775ef299a" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.15.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "9795df5a2039599f344afc928abd7b72" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.16.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "f49ac286f6a9b18339cd4966906cddc4" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.15.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.15.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.15.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.15.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.15.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.15.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.15.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.16.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.16.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "1ee346cd515412f74afea6375906e886" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.16.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "7d118c520c50f57c6169b1ed98fcce7a" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.16.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "84a6024bdf36187d58c1ad3feec3d56b" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.17.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "b2f508e836552e854a071023b55f65cb" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.16.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.16.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.16.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.16.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.16.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.16.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.17.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "3c414cb65c4e802ff78b9bc15346c424" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.17.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "f393b00ad4b314d17c425b1c22096596" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.17.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "f0ddd28a7cd4e9d79ee46df0655313bd" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.18.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "1fc631e642c4d83f265935a8b8f68fb9" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.17.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.17.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.17.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.17.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.17.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.17.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.17.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.18.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "7d8446174811d00ca88eff6df93bc597" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.18.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "3fb69fced6c97e4236099245d12eb426" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.18.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "17c8ff566e42467494d562755f609170" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.19.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "1566725ff59a53395373f4574cb2313e" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.18.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.18.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.18.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.18.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.18.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.18.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.18.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.19.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.19.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "e8fcbc8a1d5a277e398e92cc7a677a1e" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.19.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "5c1a95952256a5498ccfb94b5aa2260d" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.19.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "4423eb0247c9a543c5a79462fd87e0eb" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.20.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "03810ea94fd581815cb70d30e3dc7b4a" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.19.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.19.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.19.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.19.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.19.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.19.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.20.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "86a515e5a725863863593968efc56935" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.20.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "afe25494a905fadbb88c3a9645eb27e6" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.20.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "e771c7c8760b79e591ae5880afc0053d" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.21.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "cb46f1d8d5bba9c760387b07b78187e9" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.20.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.20.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.20.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.20.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.20.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.20.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.20.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.21.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "d2bfa31f4edbe05c06ff2d4cd81fb390" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.21.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "3f093ea1a6ba9ddc1269e2951e893335" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.21.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "4a129e267ecde19993252b11bc092c4c" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.22.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "3c100126712a1bec0b381d503bbfbf72" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.21.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.21.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.21.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.21.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.21.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.21.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.21.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.22.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.22.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "41b612ce189209b218b81314037635ef" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.22.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "a56418a34f4e4c70eb97735631808123" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.22.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "94a1350b04b471f47bc8270d65fb627b" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.23.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "efd753fef61814cba77b344cc53b9212" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.22.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.22.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.22.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.22.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.22.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.22.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.23.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "a1bab9dc8d12061a979d585126bc2471" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.23.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "b965a19263720dc3c853c2c12f665980" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.23.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "32127bcd4d4ea161cc8ac91cd8d82286" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.24.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "fc1a918f79cf48da60dc3d54822b4471" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.23.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.23.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.23.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.23.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.23.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.23.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.23.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.24.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "15d2637387a9a4b6ac34e7471a6b0dfd" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.24.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "42e337db1656cd9191ffa874d3860c5c" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.24.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "f643f01316fd7637f83979fe4f4a5994" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.25.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "be68d4c2fad011d74f7b2e5e46e797f0" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.24.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.24.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.24.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.24.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.24.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.24.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.24.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.25.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.25.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "e954fb0120a16a7fa91a8ddf254433e3" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.25.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "68d175eee215a0a5b7110b5875d5ec71" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.25.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "41aa78ae12b594fa23df8aba895eabff" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.26.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "8281507a523934f0591ef7bfb71244dc" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.25.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.25.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.25.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.25.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.25.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.25.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.26.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "d837253c7b99f6629e61910c3df01017" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.26.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "47a84cd555481d8fbb57054ddfffeb58" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.26.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "6c83a5f59329e0923b312c830379b0e0" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.27.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "ebb58a78a014c8c594bcf18d30c7b4ad" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.26.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.26.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.26.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.26.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.26.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.26.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.26.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.27.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "18a7df21df7532130e0ce3e3a0f30199" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.27.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "e1c96efddfa353a029ab00637cace2b2" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.27.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "6317fd6cd8df92618465ba5779ff7044" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.28.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "37eb4e615b7b59861439ac97dcb03538" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.27.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.27.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.27.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.27.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.27.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.27.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.27.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.28.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.28.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "c37ad6ba8dcc374a9ab9e1943379708b" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.28.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "423378676d449ae03b82b060328312bc" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.28.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "e63e805c892c5dd63da704e3d4d60df4" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.29.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "326543bb857bbda805c00f686f2efdda" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.28.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.28.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.28.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.28.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.28.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.28.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.29.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "e7a30740f1cfaaebc41ff2cc7f9cf35d" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.29.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "9a87770823982848b2a8d785c0467ef7" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.29.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "9b3b9f366542d5958b5a7632da0d43d6" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.30.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "cf798b227f9a5d6b51a6fca58a3ab76c" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.29.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.29.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.29.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.29.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.29.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.29.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.29.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.30.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "9370eee805f98c513e84dd73f7aad115" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.30.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "a09dce6ceaf4955a4a029eacbc9262b2" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.30.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "f608e1f10a13c182a7e1f6812a14d806" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.31.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "66d82ee61d40b5f9cb0577a531896147" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.30.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.30.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.30.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.30.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.30.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.30.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.30.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.31.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.31.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "c9568a1b04035b1eb5415d7b395f0ba4" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.31.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "b3b3f1b7845063465589a8ef2a8a80d4" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.31.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "c75969c1dc97ae3ab733a26ef0352de7" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.32.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "df74d11b514a054e9c68c5f7f4c5f15a" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.31.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.31.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.31.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.31.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.31.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.31.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.32.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "27ead50f2cd3e52b785c732da0040830" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.32.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "be192cb6d159f7813a0cf5e8dd5178c5" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.32.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "82cc8a93a394e6059d7aca306308f3a5" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.33.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "e848fd18de8c098e8e90b380a51983e3" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.32.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.32.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.32.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.32.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.32.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.32.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.32.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.33.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "4ec3e90791139e43bbb94e490ff523a1" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.33.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "fc6aac577987525f855681b3755cf0f1" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.33.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "63687adca714c6861265508d9d8aba1d" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.34.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "2c38fe49bb49dbb4a320ded823e8ba8d" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.33.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.33.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.33.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.33.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.33.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.33.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.33.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.34.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.34.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "ec4e8c3f94e3afd854a8b38a35d72cb9" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.34.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "e359cbd903d54240a0d67cb9a14f15be" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.34.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "42f303933d372455229d8a42135eae8d" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.35.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "8c2dcd44bc8be6358de5b840f1e77d4b" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.34.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.34.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.34.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.34.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.34.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.34.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.35.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "b109893d7101eb36cd1b8297701aa8fe" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.35.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "af9d0b5ccd5033274aa007467538179e" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.35.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "30b980280c0ebf3ba406bd3557a5b6d4" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.36.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "0c98602a827b55ca3a3be7b424ea9ac3" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.35.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.35.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.35.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.35.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.35.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.35.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.35.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.36.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "1f68e15c80896acb7699d006c4174685" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.36.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "224fc0184765b20a54c7d8749b6f886b" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.36.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "5cc2ef522e76e2a8a825634e849290e9" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.37.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "19ee4f92095f637d12199f4f0fd576db" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.36.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.36.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.36.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.36.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.36.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.36.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.36.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.37.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.37.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "0f506a1f65edb8a9306080bb4000726f" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.37.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "f296779fdfe39794398505fe591fad86" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.37.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "38dd99e729075fc31c1013882ba69c50" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.38.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "f8125b6314383c8ed6c47966d92632d8" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.37.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.37.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.37.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.37.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.37.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.37.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.38.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "d1ee4b7e370ecea81645b52ac4bc713e" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.38.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "8ea1af26e5c68b71aab2e42b61394a87" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.38.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "44090d401acf8796c78359475002a704" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.39.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "42fa7de06043cc49ff3e0b234a5ed40f" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.38.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.38.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.38.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.38.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.38.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.38.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.38.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.39.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "abf50d18a699519ae9ffa643c00e894b" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.39.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "b8655dff40941641c56e4accfb3801e6" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.39.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "186a8808fbab298e1090750e54222f32" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.4.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "96889b9c475223619d7514f55ef9df72" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.39.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.39.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.39.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.39.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.39.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.39.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.39.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.ln_f.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.4.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "335c2c770ef99da13a11bdc59e389087" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.4.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "490eda10b06c0404a0e31cbc1a6de754" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.4.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "da6da39bbfdbf4e3d20673d000570662" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.5.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "2bfc228f3eb02dbcc948a4ea5aed8da9" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.4.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.4.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.4.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.4.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.4.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.4.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.5.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "18c52e7d8383b663da1178008b601ebd" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.5.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "3eef47186928c5f7abeeb78e6821339c" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.5.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "a558a3d56fb45775daf4c8b9ef6e5603" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.6.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "f4139cd0409b2cd37ee064c78c0f34cf" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.5.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.5.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.5.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.5.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.5.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.5.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.5.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.6.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "c91a62585c1792f52555e74075479d61" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.6.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "34e32044b39fde438ec4f57083d841c2" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.6.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "d2fffe239acd9ec6fa731dbfef9b6300" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 32839680, + "records": [ + { + "name": "transformer.h.6.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.6.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.6.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.6.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.6.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.6.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.6.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.7.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32829440 + } + ], + "md5sum": "d9df0d9beaa754df73105d78d630bf9d" + } + ] +} \ No newline at end of file diff --git a/ndarray-cache.json b/ndarray-cache.json new file mode 100644 index 0000000000000000000000000000000000000000..3751b51eeece484828ee684ccab458cea74916b5 --- /dev/null +++ b/ndarray-cache.json @@ -0,0 +1,6095 @@ +{ + "metadata": { + "ParamSize": 445, + "ParamBytes": 8858030080.0, + "BitsPerParam": 5.001961295228238 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 389283840, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 152064, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 389283840, + "byteOffset": 0 + } + ], + "md5sum": "9924dae6de4c54615afce8ae05066461" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 48660480, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 152064, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 48660480, + "byteOffset": 0 + } + ], + "md5sum": "88339473662d52ec1a37a2715afa0b85" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.0.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "ee9431a17d9b83b4bdd98e97aa2439d2" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.0.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "537211d63edb83dbf6c568af82f95285" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 389283840, + "records": [ + { + "name": "transformer.wte.q_weight", + "shape": [ + 152064, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 389283840, + "byteOffset": 0 + } + ], + "md5sum": "8b8cfdbe3a3a677776ff7a9e363ce0d9" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 48660480, + "records": [ + { + "name": "transformer.wte.q_scale", + "shape": [ + 152064, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 48660480, + "byteOffset": 0 + } + ], + "md5sum": "324e084c2d9e2c6461b1c42dc465f5e2" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.0.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "6b72494212f00e7a6a750a4eccc441c3" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.1.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "d7252f03f3647a12fc5ec786b1b9a12b" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 32890880, + "records": [ + { + "name": "transformer.h.0.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 0 + }, + { + "name": "transformer.h.0.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 30720 + }, + { + "name": "transformer.h.0.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4945920 + }, + { + "name": "transformer.h.0.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18053120 + }, + { + "name": "transformer.h.0.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19691520 + }, + { + "name": "transformer.h.0.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19701760 + }, + { + "name": "transformer.h.0.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 19712000 + }, + { + "name": "transformer.h.0.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 28477440 + }, + { + "name": "transformer.h.1.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32860160 + } + ], + "md5sum": "c7bdc4e1ba73d234884b5aed1a31c56f" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.1.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "c88328f032eb0d236094664130f7e24c" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.1.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "ad37a47ca1b0367bfeb0e8d8708c774c" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.2.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "1ac414f22bea070f0727918b198f964f" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.1.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.1.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.1.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.1.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.1.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.1.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.1.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.2.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "94f2eb0ea786ad2679fa8773488228ba" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.2.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "b7e6cb2dab84c79b9d2368f1ce6ddaa1" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.2.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "82b55b58573f63d67f067cb09799cabb" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.3.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "b4ad8c927092b1f9eec186dc3f8b0576" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.2.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.2.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.2.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.2.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.2.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.2.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.2.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.3.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "67eb261d0fa2d11b94e0e908a843b0f7" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.3.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "85da5f11de4fe3d28f4894506f84167d" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.3.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "4ae56825e933c9f7f701786fe0f91d61" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.10.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "e0f3a0f7f97375f623c60a8f338539f0" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.3.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.3.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.3.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.3.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.3.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.3.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.3.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.4.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.10.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "0cabafe99e059ec0c69286b251dce55c" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.10.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "3ad33a1e132105e97744449d5af46a72" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.10.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "7836627647ca04d6822f13a912a83768" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.11.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "d81d831128a08f49c89ee5e2c700a854" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.10.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.10.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.10.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.10.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.10.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.10.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.11.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "5f5d334660fad65b7dc6480e89b8b6bf" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.11.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "6a17a528a479b7ba9de612572d9b4eeb" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.11.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "bbd5157c59c410297382b4bee47cbdd7" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.12.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "0a6029076418c278d9264446b9a2fa46" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.11.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.11.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.11.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.11.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.11.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.11.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.11.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.12.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "602b76b5dbc191663ec7d29a4e36a23d" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.12.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "9bfb30b01eec70d50a0da68e565a85d2" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.12.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "04af42e46111ca664e57348bbd969147" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.7.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "4772f5cb0dbc58d77ff1fda7f01cbc33" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 32880640, + "records": [ + { + "name": "transformer.h.12.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.12.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.12.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.12.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.12.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.12.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.12.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.13.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.10.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32839680 + }, + { + "name": "transformer.h.7.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32849920 + } + ], + "md5sum": "7e9e42e0e0bfaf0294428fdeb32eff1a" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.7.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "154ec2129f5008be74c9c88473da31c0" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.7.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "4bc9664640c048363775de60eceb6729" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.8.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "ddb8f93509ddf4704b40d002e4328522" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.7.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.7.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.7.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.7.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.7.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.7.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.8.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "771dbf90de3956bb58c84fea5abd6933" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.8.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "58e190e766c68e54279068b2c01b89d4" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.8.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "b558e342db5bf000866ecb38b2c33fc7" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.9.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "76d528af3ab61882d04c14129ecbaa7e" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.8.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.8.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.8.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.8.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.8.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.8.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.8.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.9.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "b41c1d35686cde59a2594e504fcc3a68" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.9.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "5bbdc0e323687d2d011e0ceb47faff8b" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.9.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "8ec7b2f9c10945b4b1f5e2b28652160b" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.13.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "0187d5461db122e76cbfdd2d33040a0f" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.9.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.9.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.9.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.9.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.9.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.9.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.9.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.13.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "de93cf594921a28049f9bd86cefbaafa" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.13.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "5ea625bda03ef285ff47510e141625d2" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.13.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "dbb88e60327cf494ca34956740652df0" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.14.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "e246abbdcc9023d7bbbe14bfa4841570" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.13.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.13.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.13.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.13.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.13.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.13.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.14.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "22c9e2927398e2e24389a6310bda41c9" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.14.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "56e08c3d62cda55baad64ea4ee55d184" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.14.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "5d6c612b1bfe9459f92250d67d37795b" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.15.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "5733ef99fbefae9c94aa5491d39f2e19" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.14.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.14.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.14.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.14.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.14.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.14.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.14.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.15.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "362560a5cbc994a8e0c475729b81ed06" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.15.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "49edbb03ad45d41e702e24c775ef299a" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.15.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "9795df5a2039599f344afc928abd7b72" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.16.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "f49ac286f6a9b18339cd4966906cddc4" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.15.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.15.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.15.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.15.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.15.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.15.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.15.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.16.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.16.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "1ee346cd515412f74afea6375906e886" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.16.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "7d118c520c50f57c6169b1ed98fcce7a" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.16.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "84a6024bdf36187d58c1ad3feec3d56b" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.17.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "b2f508e836552e854a071023b55f65cb" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.16.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.16.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.16.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.16.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.16.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.16.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.17.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "3c414cb65c4e802ff78b9bc15346c424" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.17.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "f393b00ad4b314d17c425b1c22096596" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.17.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "f0ddd28a7cd4e9d79ee46df0655313bd" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.18.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "1fc631e642c4d83f265935a8b8f68fb9" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.17.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.17.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.17.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.17.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.17.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.17.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.17.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.18.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "7d8446174811d00ca88eff6df93bc597" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.18.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "3fb69fced6c97e4236099245d12eb426" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.18.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "17c8ff566e42467494d562755f609170" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.19.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "1566725ff59a53395373f4574cb2313e" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.18.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.18.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.18.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.18.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.18.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.18.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.18.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.19.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.19.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "e8fcbc8a1d5a277e398e92cc7a677a1e" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.19.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "5c1a95952256a5498ccfb94b5aa2260d" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.19.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "4423eb0247c9a543c5a79462fd87e0eb" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.20.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "03810ea94fd581815cb70d30e3dc7b4a" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.19.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.19.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.19.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.19.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.19.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.19.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.20.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "86a515e5a725863863593968efc56935" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.20.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "afe25494a905fadbb88c3a9645eb27e6" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.20.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "e771c7c8760b79e591ae5880afc0053d" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.21.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "cb46f1d8d5bba9c760387b07b78187e9" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.20.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.20.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.20.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.20.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.20.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.20.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.20.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.21.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "d2bfa31f4edbe05c06ff2d4cd81fb390" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.21.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "3f093ea1a6ba9ddc1269e2951e893335" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.21.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "4a129e267ecde19993252b11bc092c4c" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.22.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "3c100126712a1bec0b381d503bbfbf72" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.21.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.21.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.21.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.21.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.21.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.21.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.21.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.22.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.22.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "41b612ce189209b218b81314037635ef" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.22.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "a56418a34f4e4c70eb97735631808123" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.22.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "94a1350b04b471f47bc8270d65fb627b" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.23.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "efd753fef61814cba77b344cc53b9212" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.22.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.22.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.22.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.22.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.22.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.22.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.23.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "a1bab9dc8d12061a979d585126bc2471" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.23.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "b965a19263720dc3c853c2c12f665980" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.23.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "32127bcd4d4ea161cc8ac91cd8d82286" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.24.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "fc1a918f79cf48da60dc3d54822b4471" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.23.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.23.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.23.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.23.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.23.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.23.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.23.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.24.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "15d2637387a9a4b6ac34e7471a6b0dfd" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.24.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "42e337db1656cd9191ffa874d3860c5c" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.24.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "f643f01316fd7637f83979fe4f4a5994" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.25.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "be68d4c2fad011d74f7b2e5e46e797f0" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.24.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.24.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.24.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.24.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.24.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.24.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.24.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.25.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.25.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "e954fb0120a16a7fa91a8ddf254433e3" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.25.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "68d175eee215a0a5b7110b5875d5ec71" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.25.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "41aa78ae12b594fa23df8aba895eabff" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.26.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "8281507a523934f0591ef7bfb71244dc" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.25.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.25.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.25.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.25.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.25.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.25.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.26.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "d837253c7b99f6629e61910c3df01017" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.26.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "47a84cd555481d8fbb57054ddfffeb58" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.26.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "6c83a5f59329e0923b312c830379b0e0" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.27.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "ebb58a78a014c8c594bcf18d30c7b4ad" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.26.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.26.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.26.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.26.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.26.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.26.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.26.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.27.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "18a7df21df7532130e0ce3e3a0f30199" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.27.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "e1c96efddfa353a029ab00637cace2b2" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.27.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "6317fd6cd8df92618465ba5779ff7044" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.28.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "37eb4e615b7b59861439ac97dcb03538" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.27.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.27.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.27.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.27.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.27.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.27.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.27.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.28.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.28.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "c37ad6ba8dcc374a9ab9e1943379708b" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.28.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "423378676d449ae03b82b060328312bc" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.28.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "e63e805c892c5dd63da704e3d4d60df4" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.29.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "326543bb857bbda805c00f686f2efdda" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.28.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.28.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.28.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.28.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.28.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.28.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.29.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "e7a30740f1cfaaebc41ff2cc7f9cf35d" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.29.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "9a87770823982848b2a8d785c0467ef7" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.29.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "9b3b9f366542d5958b5a7632da0d43d6" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.30.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "cf798b227f9a5d6b51a6fca58a3ab76c" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.29.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.29.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.29.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.29.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.29.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.29.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.29.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.30.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "9370eee805f98c513e84dd73f7aad115" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.30.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "a09dce6ceaf4955a4a029eacbc9262b2" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.30.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "f608e1f10a13c182a7e1f6812a14d806" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.31.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "66d82ee61d40b5f9cb0577a531896147" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.30.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.30.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.30.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.30.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.30.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.30.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.30.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.31.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.31.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "c9568a1b04035b1eb5415d7b395f0ba4" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.31.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "b3b3f1b7845063465589a8ef2a8a80d4" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.31.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "c75969c1dc97ae3ab733a26ef0352de7" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.32.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "df74d11b514a054e9c68c5f7f4c5f15a" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.31.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.31.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.31.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.31.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.31.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.31.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.32.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "27ead50f2cd3e52b785c732da0040830" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.32.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "be192cb6d159f7813a0cf5e8dd5178c5" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.32.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "82cc8a93a394e6059d7aca306308f3a5" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.33.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "e848fd18de8c098e8e90b380a51983e3" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.32.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.32.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.32.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.32.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.32.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.32.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.32.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.33.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "4ec3e90791139e43bbb94e490ff523a1" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.33.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "fc6aac577987525f855681b3755cf0f1" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.33.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "63687adca714c6861265508d9d8aba1d" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.34.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "2c38fe49bb49dbb4a320ded823e8ba8d" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.33.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.33.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.33.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.33.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.33.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.33.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.33.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.34.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.34.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "ec4e8c3f94e3afd854a8b38a35d72cb9" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.34.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "e359cbd903d54240a0d67cb9a14f15be" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.34.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "42f303933d372455229d8a42135eae8d" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.35.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "8c2dcd44bc8be6358de5b840f1e77d4b" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.34.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.34.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.34.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.34.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.34.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.34.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.35.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "b109893d7101eb36cd1b8297701aa8fe" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.35.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "af9d0b5ccd5033274aa007467538179e" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.35.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "30b980280c0ebf3ba406bd3557a5b6d4" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.36.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "0c98602a827b55ca3a3be7b424ea9ac3" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.35.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.35.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.35.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.35.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.35.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.35.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.35.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.36.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "1f68e15c80896acb7699d006c4174685" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.36.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "224fc0184765b20a54c7d8749b6f886b" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.36.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "5cc2ef522e76e2a8a825634e849290e9" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.37.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "19ee4f92095f637d12199f4f0fd576db" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.36.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.36.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.36.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.36.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.36.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.36.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.36.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.37.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.37.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "0f506a1f65edb8a9306080bb4000726f" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.37.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "f296779fdfe39794398505fe591fad86" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.37.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "38dd99e729075fc31c1013882ba69c50" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.38.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "f8125b6314383c8ed6c47966d92632d8" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.37.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.37.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.37.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.37.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.37.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.37.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.38.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "d1ee4b7e370ecea81645b52ac4bc713e" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.38.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "8ea1af26e5c68b71aab2e42b61394a87" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.38.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "44090d401acf8796c78359475002a704" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.39.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "42fa7de06043cc49ff3e0b234a5ed40f" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.38.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.38.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.38.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.38.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.38.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.38.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.38.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.39.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "abf50d18a699519ae9ffa643c00e894b" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.39.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "b8655dff40941641c56e4accfb3801e6" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.39.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "186a8808fbab298e1090750e54222f32" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.4.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "96889b9c475223619d7514f55ef9df72" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 32870400, + "records": [ + { + "name": "transformer.h.39.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.39.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.39.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.39.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.39.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.39.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.39.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.ln_f.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32829440 + }, + { + "name": "transformer.h.4.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32839680 + } + ], + "md5sum": "335c2c770ef99da13a11bdc59e389087" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.4.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "490eda10b06c0404a0e31cbc1a6de754" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.4.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "da6da39bbfdbf4e3d20673d000570662" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.5.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "2bfc228f3eb02dbcc948a4ea5aed8da9" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 32849920, + "records": [ + { + "name": "transformer.h.4.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.4.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.4.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.4.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.4.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.4.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24053760 + }, + { + "name": "transformer.h.5.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32819200 + } + ], + "md5sum": "18c52e7d8383b663da1178008b601ebd" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.5.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "3eef47186928c5f7abeeb78e6821339c" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.5.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "a558a3d56fb45775daf4c8b9ef6e5603" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.6.attn.c_attn.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "f4139cd0409b2cd37ee064c78c0f34cf" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 32860160, + "records": [ + { + "name": "transformer.h.5.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.5.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.5.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.5.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.5.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.5.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.5.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.6.attn.c_attn.bias", + "shape": [ + 15360 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30720, + "byteOffset": 32829440 + } + ], + "md5sum": "c91a62585c1792f52555e74075479d61" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 35061760, + "records": [ + { + "name": "transformer.h.6.mlp.c_proj.q_weight", + "shape": [ + 5120, + 1712 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35061760, + "byteOffset": 0 + } + ], + "md5sum": "34e32044b39fde438ec4f57083d841c2" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 70123520, + "records": [ + { + "name": "transformer.h.6.mlp.gate_up_proj.q_weight", + "shape": [ + 27392, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70123520, + "byteOffset": 0 + } + ], + "md5sum": "d2fffe239acd9ec6fa731dbfef9b6300" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 32839680, + "records": [ + { + "name": "transformer.h.6.attn.c_attn.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "transformer.h.6.attn.c_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "transformer.h.6.attn.c_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "transformer.h.6.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "transformer.h.6.ln_2.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "transformer.h.6.mlp.c_proj.q_scale", + "shape": [ + 5120, + 428 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4382720, + "byteOffset": 19681280 + }, + { + "name": "transformer.h.6.mlp.gate_up_proj.q_scale", + "shape": [ + 27392, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8765440, + "byteOffset": 24064000 + }, + { + "name": "transformer.h.7.ln_1.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32829440 + } + ], + "md5sum": "d9df0d9beaa754df73105d78d630bf9d" + } + ] +} \ No newline at end of file diff --git a/params_shard_0.bin b/params_shard_0.bin new file mode 100644 index 0000000000000000000000000000000000000000..39d073e38e10c99812bdd47a08961719944b6dd8 --- /dev/null +++ b/params_shard_0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40a48895c281508ec21727808ba1f307e8eb5bde0875fe01a54f0d264052c9a8 +size 389283840 diff --git a/params_shard_1.bin b/params_shard_1.bin new file mode 100644 index 0000000000000000000000000000000000000000..7c5a5d57e38d2829590e12c1b752c83d861fcb08 --- /dev/null +++ b/params_shard_1.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50159c63a326781d1fa0e459d80cb1f4641edec57dc947d5f78d039a103775e7 +size 48660480 diff --git a/params_shard_10.bin b/params_shard_10.bin new file mode 100644 index 0000000000000000000000000000000000000000..ac93406beb41a3ef85fd8a14fbc3b6ed719fbd22 --- /dev/null +++ b/params_shard_10.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:391784dd8c468e8a9086984137d8d2047ac0066a68aab3ad3e43d29054d46ec6 +size 70123520 diff --git a/params_shard_100.bin b/params_shard_100.bin new file mode 100644 index 0000000000000000000000000000000000000000..7d47a81ebd5f7f6e256814fead8d30fcfbb8fbfa --- /dev/null +++ b/params_shard_100.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06a795019e0148f9efae23eefb7f4e6a805ff5bd1b108d414d80d5e264af8c1b +size 32860160 diff --git a/params_shard_101.bin b/params_shard_101.bin new file mode 100644 index 0000000000000000000000000000000000000000..ebdd1751625e415437f8405375215f98888558d6 --- /dev/null +++ b/params_shard_101.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79079fe33f5aa068d07b2320cc11982610057f3debc735d42884ec422df762c8 +size 35061760 diff --git a/params_shard_102.bin b/params_shard_102.bin new file mode 100644 index 0000000000000000000000000000000000000000..d717251db5102ec351c3f5d556035526a94f5822 --- /dev/null +++ b/params_shard_102.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c907ed6e1e4d970871418ccaec19b1504a5ef00984b382a238bf5a992453acd +size 70123520 diff --git a/params_shard_103.bin b/params_shard_103.bin new file mode 100644 index 0000000000000000000000000000000000000000..5a6ffd27afd29974274a68889aa1ae63a455b219 --- /dev/null +++ b/params_shard_103.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcd6a1580cfe396222808ff0647dff070c1bf8b38235baabdf60217d2a7ae815 +size 39321600 diff --git a/params_shard_104.bin b/params_shard_104.bin new file mode 100644 index 0000000000000000000000000000000000000000..f329789be49ef8989b3f266b6f8da7e2f2cf06e0 --- /dev/null +++ b/params_shard_104.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73fae843f1dee2c78719047e2ce9ea6ef6730e1776b30a99ef3e4627b67dfa17 +size 32870400 diff --git a/params_shard_105.bin b/params_shard_105.bin new file mode 100644 index 0000000000000000000000000000000000000000..c1292ab3e9e5b16b8ae5dd03d9250744c33e92ee --- /dev/null +++ b/params_shard_105.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61f0f03bce8ab1a846ac89f3637459eee6a0484e5eea544ce351846a6b24e488 +size 35061760 diff --git a/params_shard_106.bin b/params_shard_106.bin new file mode 100644 index 0000000000000000000000000000000000000000..750fa28b44a18595e7e251b0fb2d068a054651b7 --- /dev/null +++ b/params_shard_106.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f4f7dd8c1470bff08d986a284d5333f4fca6b9bb40f4e091f168c71d7855f0f +size 70123520 diff --git a/params_shard_107.bin b/params_shard_107.bin new file mode 100644 index 0000000000000000000000000000000000000000..4342cff3a54c76f79e20b9dcf00542e676390d40 --- /dev/null +++ b/params_shard_107.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39eab938e804d11ef51edbd246d56b617438ec881ec1ab8ed9fbf2a997e1b2dc +size 39321600 diff --git a/params_shard_108.bin b/params_shard_108.bin new file mode 100644 index 0000000000000000000000000000000000000000..ac9cfc961d18e3f7abae3dc803af0ad6b9534371 --- /dev/null +++ b/params_shard_108.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22fccef7e6a79c4c42e64ec94342a0bf38f90026d7f0f9914e59cbcf640e8351 +size 32849920 diff --git a/params_shard_109.bin b/params_shard_109.bin new file mode 100644 index 0000000000000000000000000000000000000000..a20ee7d02053569e1a74f5ccf89b57b9f6c67624 --- /dev/null +++ b/params_shard_109.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bef96e71a0914516ea08c9cc51a24ce228e1c64864cd71242cc9318603c264f +size 35061760 diff --git a/params_shard_11.bin b/params_shard_11.bin new file mode 100644 index 0000000000000000000000000000000000000000..4c34c8c211cb247a5233b101e795999e3eada04a --- /dev/null +++ b/params_shard_11.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e34f87ab91d179af72195e0153bfef6c05c3b721892ec263cd07de3ef60651d +size 39321600 diff --git a/params_shard_110.bin b/params_shard_110.bin new file mode 100644 index 0000000000000000000000000000000000000000..0359ba526516f714a2deacd1e8f9a0d1c5df2d57 --- /dev/null +++ b/params_shard_110.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17ca932c0b6c995b4e3492c97820fc83372eb6d25ee91f0d1dd9b55318f7253b +size 70123520 diff --git a/params_shard_111.bin b/params_shard_111.bin new file mode 100644 index 0000000000000000000000000000000000000000..c6fd8330ffda96bbbcc9edd06281d98a87100c9b --- /dev/null +++ b/params_shard_111.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6824b7fd29de741163fb35c3f1b2c5dc4e99cc8cb0af62d3d4bf66eead3ba4d5 +size 39321600 diff --git a/params_shard_112.bin b/params_shard_112.bin new file mode 100644 index 0000000000000000000000000000000000000000..c9e8d5daa9f4903b5c468137c0235c6adfb91e15 --- /dev/null +++ b/params_shard_112.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3142378822871a103742b23ed69ff77791b53fcc13c33ac69404b97296cca6c6 +size 32860160 diff --git a/params_shard_113.bin b/params_shard_113.bin new file mode 100644 index 0000000000000000000000000000000000000000..70e07ef111e919489d04690a42cb94333842f964 --- /dev/null +++ b/params_shard_113.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7f8f5ca868251c28d5314664ac427686475b81e04527e10b774bda9d8cb79fd +size 35061760 diff --git a/params_shard_114.bin b/params_shard_114.bin new file mode 100644 index 0000000000000000000000000000000000000000..6ca68851ecf536b088428dee183b7b16a4b5a445 --- /dev/null +++ b/params_shard_114.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a584008d6cac11a93e02ae5ffd2d15e7981c9521350e442700d0ac8b76e1b45f +size 70123520 diff --git a/params_shard_115.bin b/params_shard_115.bin new file mode 100644 index 0000000000000000000000000000000000000000..065c4fde3ea2b79b3d652f435161dacd2e3e2dd8 --- /dev/null +++ b/params_shard_115.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cdc33ab42621310ef8b9eadaee8b2a821276866f675f9e441cbc7fae35cc3d8 +size 39321600 diff --git a/params_shard_116.bin b/params_shard_116.bin new file mode 100644 index 0000000000000000000000000000000000000000..fe2ed1426a7846d1fcdc3ad6a8c8e8fa84f4f775 --- /dev/null +++ b/params_shard_116.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:072959f5b6bd28a017b9753389d5bf26664dbebd87719504efd36d57895ca8be +size 32870400 diff --git a/params_shard_117.bin b/params_shard_117.bin new file mode 100644 index 0000000000000000000000000000000000000000..7d517319d5a35bf0757f88d013b198ba7925bb33 --- /dev/null +++ b/params_shard_117.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acf9895f1255291e0f1b68d2298ca3bc5c923f86a93c102e624a684518fe01e9 +size 35061760 diff --git a/params_shard_118.bin b/params_shard_118.bin new file mode 100644 index 0000000000000000000000000000000000000000..9815a16baaccba073089a5adff277cbc311feae5 --- /dev/null +++ b/params_shard_118.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a565958659adb6bc9226a4d3513319ddf11576ec54c79c84db461e577d46e49c +size 70123520 diff --git a/params_shard_119.bin b/params_shard_119.bin new file mode 100644 index 0000000000000000000000000000000000000000..ad659ba07260cf047646b9d0cf6c75663018025b --- /dev/null +++ b/params_shard_119.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:659a9f6695ee1286093dafe0ad8028b703fbd418625d525ba516e2ad40555b3d +size 39321600 diff --git a/params_shard_12.bin b/params_shard_12.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0a055f64d87daa2b3e088ed279843ce2dd12892 --- /dev/null +++ b/params_shard_12.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ac06003c5096d634b0d887bb85a47517397435c6b80fd59aa52aadf78c1d558 +size 32860160 diff --git a/params_shard_120.bin b/params_shard_120.bin new file mode 100644 index 0000000000000000000000000000000000000000..8f9020ec8f9fda9b2b263593dddb4fac6c957a60 --- /dev/null +++ b/params_shard_120.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63e472b7a5883b1dd8893e275770d07756eeda7b328ab76f2d3f34398ccc0c2b +size 32849920 diff --git a/params_shard_121.bin b/params_shard_121.bin new file mode 100644 index 0000000000000000000000000000000000000000..4d00d0c7029105f1ac22f5c589aee0ee19029b19 --- /dev/null +++ b/params_shard_121.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:296610d97bf097c5ef36843d76694fdd4392e57e33b2ffb96a120768ce1fce3b +size 35061760 diff --git a/params_shard_122.bin b/params_shard_122.bin new file mode 100644 index 0000000000000000000000000000000000000000..2ca4ad421c4a7c5b6fe96840bcf108f884375230 --- /dev/null +++ b/params_shard_122.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b81e9129a716de105d7906b5868aff8343dcdd288710ec91392a40318c290279 +size 70123520 diff --git a/params_shard_123.bin b/params_shard_123.bin new file mode 100644 index 0000000000000000000000000000000000000000..a017cfb3e209081f39d08e761f6945f7a429bde2 --- /dev/null +++ b/params_shard_123.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94d8f04fb28b54b18718887fa9374cb46f658dd0919b2f6e14580f4e2635f8be +size 39321600 diff --git a/params_shard_124.bin b/params_shard_124.bin new file mode 100644 index 0000000000000000000000000000000000000000..25e9ec8897baa6938a0a667a3eb21d6c897d9072 --- /dev/null +++ b/params_shard_124.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e1c2ea5f746496cbb1f074b9d9389e97341f2d81f29d464546fa0a594577f9b +size 32860160 diff --git a/params_shard_125.bin b/params_shard_125.bin new file mode 100644 index 0000000000000000000000000000000000000000..fc6f19f9a08327bfd61bb09a64a1f910f96c6764 --- /dev/null +++ b/params_shard_125.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e93ee7fa50b5987d865c79d05accf1090dd0c7059f15607e6f0ff4e2834aa275 +size 35061760 diff --git a/params_shard_126.bin b/params_shard_126.bin new file mode 100644 index 0000000000000000000000000000000000000000..39c7344512db8ff814f50b91cc8a95d9d192fdc8 --- /dev/null +++ b/params_shard_126.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69cb962255c11a6e06ea35a37ba37c483cecf1c8d53357470c8d523a1c6d4199 +size 70123520 diff --git a/params_shard_127.bin b/params_shard_127.bin new file mode 100644 index 0000000000000000000000000000000000000000..c7a931c6a66604ec089bf9a6431eed32c7beae24 --- /dev/null +++ b/params_shard_127.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5e8d77640908ba791cfac734b2314e27ad1632db377108beac50973da4860e1 +size 39321600 diff --git a/params_shard_128.bin b/params_shard_128.bin new file mode 100644 index 0000000000000000000000000000000000000000..653351e02c0cdf2f524c8a96853512228e5feeed --- /dev/null +++ b/params_shard_128.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca5466e4173f15e7138d4d701fa36758407e5bd119d49f95c0aa54d2f3c132b1 +size 32870400 diff --git a/params_shard_129.bin b/params_shard_129.bin new file mode 100644 index 0000000000000000000000000000000000000000..51314bec3550ea63f8049a8ab19c8a02d02c0da6 --- /dev/null +++ b/params_shard_129.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0ff1087484a91a9766afe5ee5b2ccd5a5c0672cf690597bcfa6ec0607bcaf54 +size 35061760 diff --git a/params_shard_13.bin b/params_shard_13.bin new file mode 100644 index 0000000000000000000000000000000000000000..2dec01afc2be138809fbbaae19da6d1e1b357a90 --- /dev/null +++ b/params_shard_13.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5e1d85bbcf52ecabe749231357834f24a430f46d1bdb4a6b5cd1dd67ff91082 +size 35061760 diff --git a/params_shard_130.bin b/params_shard_130.bin new file mode 100644 index 0000000000000000000000000000000000000000..6900c76389779ccd1d5ce7701541cdb339cb733c --- /dev/null +++ b/params_shard_130.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31e5b128f8b841b75d1fc3e553b8001cbe210ada029510685c60f9c9fed3e172 +size 70123520 diff --git a/params_shard_131.bin b/params_shard_131.bin new file mode 100644 index 0000000000000000000000000000000000000000..0ec6f4abbbf50e7397aaa407bea609d499a5000c --- /dev/null +++ b/params_shard_131.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:592cd65b801420538b563480b32524df0f2b5b239418d6e53592a2fd116760d9 +size 39321600 diff --git a/params_shard_132.bin b/params_shard_132.bin new file mode 100644 index 0000000000000000000000000000000000000000..bce82761c79d52bcf6d63c563b7383f8e4e42f50 --- /dev/null +++ b/params_shard_132.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:045abf478b6ae550fd9e36dc8825d9591c43da34bec06bdd5005ed238f6d1a60 +size 32849920 diff --git a/params_shard_133.bin b/params_shard_133.bin new file mode 100644 index 0000000000000000000000000000000000000000..59f3f0f5079239c91c03f8612a1735aaf75d33a5 --- /dev/null +++ b/params_shard_133.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b83491e30f8b200edeb9e920b0e00e33b9a9101980f57a1a6006eec98b97fd67 +size 35061760 diff --git a/params_shard_134.bin b/params_shard_134.bin new file mode 100644 index 0000000000000000000000000000000000000000..d79f8813c0cc1ec956a8dfa06c46cd1183f088cd --- /dev/null +++ b/params_shard_134.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cd4e69e10af86ad7d4134c386cd9a5d0273bffb55e6c80707531319ecdecc12 +size 70123520 diff --git a/params_shard_135.bin b/params_shard_135.bin new file mode 100644 index 0000000000000000000000000000000000000000..11907855c82d0053632c93327c7ecb89cb1c7c22 --- /dev/null +++ b/params_shard_135.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19eae9da8d13cf6fea416883567102b464e88f906e62d58d5c9423d0e2dcede0 +size 39321600 diff --git a/params_shard_136.bin b/params_shard_136.bin new file mode 100644 index 0000000000000000000000000000000000000000..6209ae66360a129b221db56c6df30d37a9333642 --- /dev/null +++ b/params_shard_136.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6ef2774069c7cd1166e6329168383bba65e398c861c06dea0791ef2c144069a +size 32860160 diff --git a/params_shard_137.bin b/params_shard_137.bin new file mode 100644 index 0000000000000000000000000000000000000000..c8aedc79d6b5606757ae801d6fa9fc8e5ba77949 --- /dev/null +++ b/params_shard_137.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84c2c5d2dde94c4661826f719ccae168fda3e036cde89568c49d6d349e4ae3e3 +size 35061760 diff --git a/params_shard_138.bin b/params_shard_138.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3117322c80d5f58eedca661f8ae4d2ac37b5103 --- /dev/null +++ b/params_shard_138.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4aa7d38f324233ab4fcef39aa5a4ec038149552e61e19e86ca4931c8a5424e5 +size 70123520 diff --git a/params_shard_139.bin b/params_shard_139.bin new file mode 100644 index 0000000000000000000000000000000000000000..07f2c77b1e26cf8a886c460824d46376a0f63fef --- /dev/null +++ b/params_shard_139.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15333a3f1ba93a89ee57ff6e02c0b420bcc56cff6e848e4dcfabbec2a948ad5f +size 39321600 diff --git a/params_shard_14.bin b/params_shard_14.bin new file mode 100644 index 0000000000000000000000000000000000000000..edd833f57c995ea3cfbe75902d19a72c42cbf7ce --- /dev/null +++ b/params_shard_14.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3887e9a7cdd3dbd70fdd79eed4d30ff927f44b535885621c174557171b05b0b9 +size 70123520 diff --git a/params_shard_140.bin b/params_shard_140.bin new file mode 100644 index 0000000000000000000000000000000000000000..bdcd9ede2e351ea421145ad5b91f83ee770c6c36 --- /dev/null +++ b/params_shard_140.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cc0a3f5155c35badbde3aea0784a3267eaebc24cd400480d151f0d8c6fed44a +size 32870400 diff --git a/params_shard_141.bin b/params_shard_141.bin new file mode 100644 index 0000000000000000000000000000000000000000..36fc09896b826d1b61963d449bc41c714d770a18 --- /dev/null +++ b/params_shard_141.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d01de9b75ba1e2543d733474e8b665e2ce971af4ff2e8b7204ece221f515f9f +size 35061760 diff --git a/params_shard_142.bin b/params_shard_142.bin new file mode 100644 index 0000000000000000000000000000000000000000..971ae3b3199276474c8d8c3329ec1e6ee1993412 --- /dev/null +++ b/params_shard_142.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c3b2298a253bcb217478f0f7ac29fd275c3c51532d326a2203a5f9a4271f1cf +size 70123520 diff --git a/params_shard_143.bin b/params_shard_143.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e89f4a69999b8c4aa04df4a3c5475c87638ff7f --- /dev/null +++ b/params_shard_143.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b35a3bad95f5f236cb4a73c84f3d8e843ad024da50265092c2d541f418ee218 +size 39321600 diff --git a/params_shard_144.bin b/params_shard_144.bin new file mode 100644 index 0000000000000000000000000000000000000000..1e6b31fa701f8e2bcc6c6a17fa729bb239138628 --- /dev/null +++ b/params_shard_144.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:573e74d9b0a4b1678ab3603d1b8bc9aedd87f6847df9822e56d6736240b142ee +size 32849920 diff --git a/params_shard_145.bin b/params_shard_145.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f5af12c3efb0edb2d53c723c5f9644723292a43 --- /dev/null +++ b/params_shard_145.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4818f1291fd0fb4c5689d83568efea8b3fd8e0738be6da8e69c906340231e957 +size 35061760 diff --git a/params_shard_146.bin b/params_shard_146.bin new file mode 100644 index 0000000000000000000000000000000000000000..ab0f280b6dd669ed4235b41d8d0292e0ef40ce3b --- /dev/null +++ b/params_shard_146.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d09c147d8c34b031ef1c1e0ab5c7b3fda8ba3fddbcc735eb04adc2239546e1df +size 70123520 diff --git a/params_shard_147.bin b/params_shard_147.bin new file mode 100644 index 0000000000000000000000000000000000000000..1416583a10a394a4d6afae95093b980f69924366 --- /dev/null +++ b/params_shard_147.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b6278827355c012e44407dbdb95273fbba057002859ab60e47aedc23191b64e +size 39321600 diff --git a/params_shard_148.bin b/params_shard_148.bin new file mode 100644 index 0000000000000000000000000000000000000000..a50ee279d1036ab45d9e9cc09bfc85418d4d1088 --- /dev/null +++ b/params_shard_148.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cb31579178cd8455d0cb2a9f8ecc69c5a75fb61f135fda47fd7dcab777f258d +size 32860160 diff --git a/params_shard_149.bin b/params_shard_149.bin new file mode 100644 index 0000000000000000000000000000000000000000..93e72c246e8789740a098de4132009d6a2fc7dad --- /dev/null +++ b/params_shard_149.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f64872984f4e2b4d516318a3a9191048a01d4f6e334c18ea1df49c052aca65fb +size 35061760 diff --git a/params_shard_15.bin b/params_shard_15.bin new file mode 100644 index 0000000000000000000000000000000000000000..5694e1a4f41ebcbcc433cb668fa45e5325809912 --- /dev/null +++ b/params_shard_15.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6821c0e101ee183645962dbd5f7a4ed670a5ed66a83d0eaf7cf13a1e79f1f40a +size 39321600 diff --git a/params_shard_150.bin b/params_shard_150.bin new file mode 100644 index 0000000000000000000000000000000000000000..6bef4bf4142e9666207ac8dc77d4f0d096520b61 --- /dev/null +++ b/params_shard_150.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c2234f8544b26f21ce6388e3ed37535b45d2a8838e2fd4f6bc23aaa589394ff +size 70123520 diff --git a/params_shard_151.bin b/params_shard_151.bin new file mode 100644 index 0000000000000000000000000000000000000000..8b9c7b6850df9211023b7cf5667bd14488d075b6 --- /dev/null +++ b/params_shard_151.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1759c9b370da05818d5735003fb3256e3de271cdf0d47bd1359a821eafadfcf +size 39321600 diff --git a/params_shard_152.bin b/params_shard_152.bin new file mode 100644 index 0000000000000000000000000000000000000000..dc280060726285c33628be22af4f7931abd67cad --- /dev/null +++ b/params_shard_152.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d489dd6aa5cd26705eae436947da6d330f5402a93cef8e9c7c831dbd3b0f68b +size 32870400 diff --git a/params_shard_153.bin b/params_shard_153.bin new file mode 100644 index 0000000000000000000000000000000000000000..142e65de635f81b6463112d7cb028fbaff2b7a19 --- /dev/null +++ b/params_shard_153.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ec06665eff6f3a55f173fc6a77afa44875c0f00edf77d1c4dabf847aeb915dc +size 35061760 diff --git a/params_shard_154.bin b/params_shard_154.bin new file mode 100644 index 0000000000000000000000000000000000000000..a6af2aaf4613604e5a61d494c428fba55cb6f2d1 --- /dev/null +++ b/params_shard_154.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e21145c7a3e9927f36f94eb0b36540279899c20d9dd7575989f2a3d1a4ccd552 +size 70123520 diff --git a/params_shard_155.bin b/params_shard_155.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ce41477b66a0b94e361ad0951c1268d0c66e8a3 --- /dev/null +++ b/params_shard_155.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e08c99bdaf8d9ff9aa3ccd09d8ff43cc408c9b2ea3fbd29e50de9dc096652f0 +size 39321600 diff --git a/params_shard_156.bin b/params_shard_156.bin new file mode 100644 index 0000000000000000000000000000000000000000..588696ea6bd880e3b16ec785646da97346246eaf --- /dev/null +++ b/params_shard_156.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:899ecb55f8fd7a295ec7e08cd50963e9b2466dffb33e2dd7e87efb1afe8df085 +size 32849920 diff --git a/params_shard_157.bin b/params_shard_157.bin new file mode 100644 index 0000000000000000000000000000000000000000..323757fe30a22ad5084d0537cfd3f98c1de5c5db --- /dev/null +++ b/params_shard_157.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:605fd65c50a3d1a7b065e5f996541ca1c1a8ca19d796cb092e05db63d38f6302 +size 35061760 diff --git a/params_shard_158.bin b/params_shard_158.bin new file mode 100644 index 0000000000000000000000000000000000000000..cdf37dc3d3b97eb58071ed37d3896c9fe3e31cab --- /dev/null +++ b/params_shard_158.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dd8853ac2a5ac3e2c9ec4429823a67aba985da2db0e1490e18a3077f9fdb663 +size 70123520 diff --git a/params_shard_159.bin b/params_shard_159.bin new file mode 100644 index 0000000000000000000000000000000000000000..9e4345c43c3d15dc1df63c4ae04b139a42b54942 --- /dev/null +++ b/params_shard_159.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b58a240a9e5383adadc96bf6f16568521411080d4f81122f37c6c57e99a7b8d8 +size 39321600 diff --git a/params_shard_16.bin b/params_shard_16.bin new file mode 100644 index 0000000000000000000000000000000000000000..796e7a38379535d1d8fdfff840a11ee817950cbc --- /dev/null +++ b/params_shard_16.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9254169e45911c506d7831c3c51ea69c0df85f6ee79373d35ddf6c27f476a623 +size 32860160 diff --git a/params_shard_160.bin b/params_shard_160.bin new file mode 100644 index 0000000000000000000000000000000000000000..4d99ec9ece1061e522c1a19a69af12c58dd96973 --- /dev/null +++ b/params_shard_160.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da7391ba609a0149ba3e37841f925049bee72285348ca7162525b2af042de834 +size 32860160 diff --git a/params_shard_161.bin b/params_shard_161.bin new file mode 100644 index 0000000000000000000000000000000000000000..be0fed7d3bc453b90c430ae3328039f6df7aa9d5 --- /dev/null +++ b/params_shard_161.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f54095e8b67cddc62cac88669bb016a0a5d013139898831a40b7f3696d6e499 +size 35061760 diff --git a/params_shard_162.bin b/params_shard_162.bin new file mode 100644 index 0000000000000000000000000000000000000000..02ebceb405c85bbe01e2b762f32116bb8b28a92d --- /dev/null +++ b/params_shard_162.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:610ee1272dcd1e65f8902594487c216bfd266d396336c587073a349e09d89d5c +size 70123520 diff --git a/params_shard_163.bin b/params_shard_163.bin new file mode 100644 index 0000000000000000000000000000000000000000..845f2e5157b0078816353ebf1ffcca1524ae3e12 --- /dev/null +++ b/params_shard_163.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5c0d62f14eb63a58296b9e58a86b885370bea6fd3e1305e275161d3c6bd6dce +size 32839680 diff --git a/params_shard_17.bin b/params_shard_17.bin new file mode 100644 index 0000000000000000000000000000000000000000..77248e2a527424ace3f5e3405ed649b693a21503 --- /dev/null +++ b/params_shard_17.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:672214d00a7094a0c7e87b0bfb39466603160def55a4de92d7cdc87067318938 +size 35061760 diff --git a/params_shard_18.bin b/params_shard_18.bin new file mode 100644 index 0000000000000000000000000000000000000000..35d7fc942c6fa4338f69829f0d0d57223988c052 --- /dev/null +++ b/params_shard_18.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:396001154536faa3d29208b8292d79af160225e8b8811ddab2c9a83f1a0317a5 +size 70123520 diff --git a/params_shard_19.bin b/params_shard_19.bin new file mode 100644 index 0000000000000000000000000000000000000000..c183e28dec6368a0366fd9c33efca93295e48926 --- /dev/null +++ b/params_shard_19.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a04785e2072a0d65990d07df372ec3afbe23f3608a500a52a4dc0f81d41f099e +size 39321600 diff --git a/params_shard_2.bin b/params_shard_2.bin new file mode 100644 index 0000000000000000000000000000000000000000..b82bac248e0bb9d412f5b7f3afe21585d2609af5 --- /dev/null +++ b/params_shard_2.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc07b5ea0d7f2915336d5f7a57cc4080ba6aef390e7f07dc64b9cc5b0c432292 +size 39321600 diff --git a/params_shard_20.bin b/params_shard_20.bin new file mode 100644 index 0000000000000000000000000000000000000000..ef496b1e312d7fe914e466565e461994e454de06 --- /dev/null +++ b/params_shard_20.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ef3979a26a85554f0797df2940e5360d99bbf2afe26a47d0267682de59a7d6c +size 32870400 diff --git a/params_shard_21.bin b/params_shard_21.bin new file mode 100644 index 0000000000000000000000000000000000000000..f50f609faa3074f0431327d70bcf6366698f2f76 --- /dev/null +++ b/params_shard_21.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69e00f803b47ea889b33fd1048a58879497e39c8e3514b9c4fc5d654388c3d44 +size 35061760 diff --git a/params_shard_22.bin b/params_shard_22.bin new file mode 100644 index 0000000000000000000000000000000000000000..62d5c1ffd1e3d1474f9ca5a23faf7a334adc18fd --- /dev/null +++ b/params_shard_22.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df02f43bea8f236d07029e651bc895062fc8be1716f73d647f56a693fb95179e +size 70123520 diff --git a/params_shard_23.bin b/params_shard_23.bin new file mode 100644 index 0000000000000000000000000000000000000000..b13d782a012c153fbb0b2cdafd6c8ee60171ae75 --- /dev/null +++ b/params_shard_23.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de35dcfe870f1b5a8ecc44d58989b54abcddfaba062be1f253b10b2c19a506d6 +size 39321600 diff --git a/params_shard_24.bin b/params_shard_24.bin new file mode 100644 index 0000000000000000000000000000000000000000..545f73ccc5dc0a59beee78af74150c1c6db38da8 --- /dev/null +++ b/params_shard_24.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f43509570e0cb4955935867c601d44dac32ccbf7471199c0a4bf5d80e228a2bf +size 32849920 diff --git a/params_shard_25.bin b/params_shard_25.bin new file mode 100644 index 0000000000000000000000000000000000000000..05605f5b0460c617bce5498158d76bdf383be93d --- /dev/null +++ b/params_shard_25.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74b21e07165e5e52853ed141a09db7c3267f4748db9ed6763e32e344b17da3f5 +size 35061760 diff --git a/params_shard_26.bin b/params_shard_26.bin new file mode 100644 index 0000000000000000000000000000000000000000..c66eca7cbb136c30d2641cbc49ec50015355900a --- /dev/null +++ b/params_shard_26.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a23dd6c8dda6960271beba2553a3357c221e15b21d4157ed342f7420a0ffcb8 +size 70123520 diff --git a/params_shard_27.bin b/params_shard_27.bin new file mode 100644 index 0000000000000000000000000000000000000000..7e35a08693bbf35e7ef53452bd3201dae6be42b6 --- /dev/null +++ b/params_shard_27.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae0ddbd8b80560e698656e97a3a6075f3dfbd9ac7846b9b8a8a564f5a58c52b +size 39321600 diff --git a/params_shard_28.bin b/params_shard_28.bin new file mode 100644 index 0000000000000000000000000000000000000000..c8d76d1187fdf12488f6f7708b19ff38621adb56 --- /dev/null +++ b/params_shard_28.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31cac42a67c7f5c2e6888c2e53d2aa90f9acf4f2ee3d0ffb92104126384ad27f +size 32860160 diff --git a/params_shard_29.bin b/params_shard_29.bin new file mode 100644 index 0000000000000000000000000000000000000000..eae7639b43c33b0c981d9786a48a80c0d6977d09 --- /dev/null +++ b/params_shard_29.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a366c7f3caa2b8394e299c13bd89da36c411afead5b1c0a424d8b6c2c01f2e72 +size 35061760 diff --git a/params_shard_3.bin b/params_shard_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..b9499225d9123691aca7d89d2f6e30c83b20a4d6 --- /dev/null +++ b/params_shard_3.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfe57d3c654dbf69d12a5950d35ba4855df2cee0cf33307f446dc74aa25f39d5 +size 70123520 diff --git a/params_shard_30.bin b/params_shard_30.bin new file mode 100644 index 0000000000000000000000000000000000000000..d9b029df89c640bf30d35a577012e5949b75cf6c --- /dev/null +++ b/params_shard_30.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:600d8ba6ecf674ae33b9a578c600f872f7a3ed233af188115e3cb0b9f1f997fc +size 70123520 diff --git a/params_shard_31.bin b/params_shard_31.bin new file mode 100644 index 0000000000000000000000000000000000000000..50e3fbb188dcd5f59ab50277063a780f4ab22d5f --- /dev/null +++ b/params_shard_31.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc08b78600963ca67d22a1857656b28a63832662094fd0768b6451cc854ce9a6 +size 39321600 diff --git a/params_shard_32.bin b/params_shard_32.bin new file mode 100644 index 0000000000000000000000000000000000000000..6e3de0957fe99adbc24fb0a6e366e19ff8e9d674 --- /dev/null +++ b/params_shard_32.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:209b02fb89a1f9b2f16633a7c9102c5fdef9efb5a8f47da070a1405ccc8262da +size 32880640 diff --git a/params_shard_33.bin b/params_shard_33.bin new file mode 100644 index 0000000000000000000000000000000000000000..1aa48a8b74ee4eea502c8afa42d2a20b2a0fe654 --- /dev/null +++ b/params_shard_33.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cff9fabc6f7c1244bf08b41070a04836bd66f1e6bd32082e3fe128139a51e020 +size 35061760 diff --git a/params_shard_34.bin b/params_shard_34.bin new file mode 100644 index 0000000000000000000000000000000000000000..1f82e405eda3d653ffc448a2c06abedd4cc3a877 --- /dev/null +++ b/params_shard_34.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7ab6f6b86ef05c4bda458a57b4136728f866027d5902eb657a640b9973b7c84 +size 70123520 diff --git a/params_shard_35.bin b/params_shard_35.bin new file mode 100644 index 0000000000000000000000000000000000000000..66cf365b282c936df8974502ea2d9b8be1c9597e --- /dev/null +++ b/params_shard_35.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05d5c3af698140821e6e9c7376c1fcda0029e6252a3a960b734c03df1db6d239 +size 39321600 diff --git a/params_shard_36.bin b/params_shard_36.bin new file mode 100644 index 0000000000000000000000000000000000000000..356da774fe6deffff91c2353dd4297afcedecc04 --- /dev/null +++ b/params_shard_36.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:189a3ff9c14380ebcd226a337854381011116f7d38620922127f4fb36c9b0430 +size 32849920 diff --git a/params_shard_37.bin b/params_shard_37.bin new file mode 100644 index 0000000000000000000000000000000000000000..d3eb0433e0b272e9c934245f8aa0c0d316f07a79 --- /dev/null +++ b/params_shard_37.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beffd39b6d25d61f0f8edc11dc3819b597e900c5ba6acc78645831f941a92bc0 +size 35061760 diff --git a/params_shard_38.bin b/params_shard_38.bin new file mode 100644 index 0000000000000000000000000000000000000000..bf6814ead3a5d744125c64ec1c7b0bd08c0457b0 --- /dev/null +++ b/params_shard_38.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ea494ba442876b8c2dbed28d1c4896ae5454472b2acb3671abbbb316d838e95 +size 70123520 diff --git a/params_shard_39.bin b/params_shard_39.bin new file mode 100644 index 0000000000000000000000000000000000000000..34d7fcb68b7c915b2c895dc168294a33e33a043c --- /dev/null +++ b/params_shard_39.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99cd33f97d1cf8a565bf5023b075363061875643bd36425177b89f0943e467bd +size 39321600 diff --git a/params_shard_4.bin b/params_shard_4.bin new file mode 100644 index 0000000000000000000000000000000000000000..6db99ca81e156ded20fa851737b2dea5323301aa --- /dev/null +++ b/params_shard_4.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bb594a66be2f2cf2d483585ac3f253a14bcdb49c04c689752ecf10b70246abf +size 389283840 diff --git a/params_shard_40.bin b/params_shard_40.bin new file mode 100644 index 0000000000000000000000000000000000000000..5680fdf2026b8b251ab40079e3c2f6077ce96f1c --- /dev/null +++ b/params_shard_40.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eeff9abbe7d000174e46bac11877fe7d31905fa220a1728f10f394bdc3b8e184 +size 32860160 diff --git a/params_shard_41.bin b/params_shard_41.bin new file mode 100644 index 0000000000000000000000000000000000000000..1eac4ca1619c5590d21729c8eb27a605d3db2310 --- /dev/null +++ b/params_shard_41.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c714cc32a3f15411cfa3c67c8c85157a86cd158713e27497adcdc674eef7cb15 +size 35061760 diff --git a/params_shard_42.bin b/params_shard_42.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0dec6bf2d4d882fa2501b601f08793cacba0482 --- /dev/null +++ b/params_shard_42.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39bad8adf50cc0a62b1c1a8e62d364ffcb69a0724f80be0a4ccb332487361689 +size 70123520 diff --git a/params_shard_43.bin b/params_shard_43.bin new file mode 100644 index 0000000000000000000000000000000000000000..498b72ee969510bd5cb5611166900c3e8524ab13 --- /dev/null +++ b/params_shard_43.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13a1b56daf92f19fbcec621983bd8b8371d8d19460d19821533b63e9d23d2b04 +size 39321600 diff --git a/params_shard_44.bin b/params_shard_44.bin new file mode 100644 index 0000000000000000000000000000000000000000..213e75a4bd7cc222fcd31420dff58290dd840f15 --- /dev/null +++ b/params_shard_44.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78a221a6a89d99748149bed7e9b8b0dc801332ec4fc3e17dbfa05e26529617e4 +size 32860160 diff --git a/params_shard_45.bin b/params_shard_45.bin new file mode 100644 index 0000000000000000000000000000000000000000..7cf4ee5f234958ecaa3e23101252eaef8da7b05f --- /dev/null +++ b/params_shard_45.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5deaa31c97704fc776b9e7daa08b30737ad4bbc83378b21203a77845f0512bd5 +size 35061760 diff --git a/params_shard_46.bin b/params_shard_46.bin new file mode 100644 index 0000000000000000000000000000000000000000..bde563757aa6e1d5fa8394df0a2b68a77ae959a0 --- /dev/null +++ b/params_shard_46.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ccbcac6d270d21a812c36aceb33ef29983ad90c2234c4604d472a0ec0677e04 +size 70123520 diff --git a/params_shard_47.bin b/params_shard_47.bin new file mode 100644 index 0000000000000000000000000000000000000000..74473dc1ce990bda7416bb1dd8aa3c35425a1648 --- /dev/null +++ b/params_shard_47.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b496b27a34ef9768f634079170d65aa625bb77faca13154fda8ed0a1221f50c +size 39321600 diff --git a/params_shard_48.bin b/params_shard_48.bin new file mode 100644 index 0000000000000000000000000000000000000000..a5047a67c4db74b30aba41f4f6d90121b678b145 --- /dev/null +++ b/params_shard_48.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abf3967135d7b4fd187e3e43f1cc6c0a84ef3aedbeab294873a6b237d1252c35 +size 32849920 diff --git a/params_shard_49.bin b/params_shard_49.bin new file mode 100644 index 0000000000000000000000000000000000000000..83cbc5803952abbc658f982eef453c6fa2aab4b4 --- /dev/null +++ b/params_shard_49.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8058648654f0cdfa29ee0f815f0191b3e7deb5f5af58522c1ab9c35ed834415 +size 35061760 diff --git a/params_shard_5.bin b/params_shard_5.bin new file mode 100644 index 0000000000000000000000000000000000000000..8ec1b5b5b3a0a20a1fde2e44ca701d5d25ac9340 --- /dev/null +++ b/params_shard_5.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:113289db44ee30cd952650aec8d39b7ea4cef94b1bcd1cfeb9960e2b602dd91b +size 48660480 diff --git a/params_shard_50.bin b/params_shard_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..9094a156aa27cb4810369d0a9903520790b88806 --- /dev/null +++ b/params_shard_50.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0954fa74125d34bc48209840fa30c710673815920061f27ef7b26553ba831298 +size 70123520 diff --git a/params_shard_51.bin b/params_shard_51.bin new file mode 100644 index 0000000000000000000000000000000000000000..45e8cec3d894a87ff0d0ebd02b0fce4583af9e0c --- /dev/null +++ b/params_shard_51.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b26c6733dd426383dbfba5ea239c2132136e30f1a32799fc11ac3ad842e839b +size 39321600 diff --git a/params_shard_52.bin b/params_shard_52.bin new file mode 100644 index 0000000000000000000000000000000000000000..90650fdaefe59fdfa8bd6d2b49b2e7bb97cb1068 --- /dev/null +++ b/params_shard_52.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cd1d7858e05481a8fe86a295c5ea2d57664f9e61ede295c2f438ea2be447986 +size 32860160 diff --git a/params_shard_53.bin b/params_shard_53.bin new file mode 100644 index 0000000000000000000000000000000000000000..cc1530527059a53fec91e013dfe03ec62d14861c --- /dev/null +++ b/params_shard_53.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27c3189fa6b6d1b9462bb70f4fce7e578cb3e7abc0e16b28d3d627a30e5cc5d3 +size 35061760 diff --git a/params_shard_54.bin b/params_shard_54.bin new file mode 100644 index 0000000000000000000000000000000000000000..44b4ca8ae318b7e709e3a07db26b9610a95f6194 --- /dev/null +++ b/params_shard_54.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48f7b15a10decfc342ae1c07a2139e1899401a085717ea0bcd708f43cb341bf1 +size 70123520 diff --git a/params_shard_55.bin b/params_shard_55.bin new file mode 100644 index 0000000000000000000000000000000000000000..0f1b597a6371a72b00689877e4525ea9a5fb6ef8 --- /dev/null +++ b/params_shard_55.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9496aaa5aaed1b8f2ee1355ce38d3a41538d5e2e79f68530dcc3f8d35cca6db3 +size 39321600 diff --git a/params_shard_56.bin b/params_shard_56.bin new file mode 100644 index 0000000000000000000000000000000000000000..14e4b2cc1e3e731ac480107195b5ab1c5c33e38c --- /dev/null +++ b/params_shard_56.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2976a805f8a172ec1282ca146eadb59c6cf9961191c1a84d6e2125dfc3461d4a +size 32870400 diff --git a/params_shard_57.bin b/params_shard_57.bin new file mode 100644 index 0000000000000000000000000000000000000000..306de9b11e9554df96f48af5872204f6f0bd93f8 --- /dev/null +++ b/params_shard_57.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1519de3728c2a0d6975cf577133d2e90063f71b768f9005c5e3b0ebbc9b360bb +size 35061760 diff --git a/params_shard_58.bin b/params_shard_58.bin new file mode 100644 index 0000000000000000000000000000000000000000..062731bf1b580232437f89410b8d8c1ee9a89666 --- /dev/null +++ b/params_shard_58.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac6000035d190b8d7f55546b0857dbfdf5ecbfdf683896bf9690a87a279466db +size 70123520 diff --git a/params_shard_59.bin b/params_shard_59.bin new file mode 100644 index 0000000000000000000000000000000000000000..b7629f7f62524ea28efd2f48d4bc310d4b08f2f2 --- /dev/null +++ b/params_shard_59.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf37c7a16a00d7670a6a14b7a6cd672fd41744cab364131082c444daaec2ce39 +size 39321600 diff --git a/params_shard_6.bin b/params_shard_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..6beff0b9c2ca11f98df8a7034cd509de5ccd8952 --- /dev/null +++ b/params_shard_6.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5ed222ad22981da38674453e6d2045ce2b9d43c49afbb8c82342d896485ea46 +size 35061760 diff --git a/params_shard_60.bin b/params_shard_60.bin new file mode 100644 index 0000000000000000000000000000000000000000..562dd897a8515b5d62f1e8b4fade3539f49a84b7 --- /dev/null +++ b/params_shard_60.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f81b8f4334b3af3a01d9b91e2d62a01d304af01a91f7ea8c8392a599e2f2f5c4 +size 32849920 diff --git a/params_shard_61.bin b/params_shard_61.bin new file mode 100644 index 0000000000000000000000000000000000000000..d354a5c103985c9b5bcd83b15c667257c2792786 --- /dev/null +++ b/params_shard_61.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd321229ee1468f86f4644ae70c6ba6acd29df723eb643e8b2094f156cf6b75b +size 35061760 diff --git a/params_shard_62.bin b/params_shard_62.bin new file mode 100644 index 0000000000000000000000000000000000000000..74e82ba49feed128b858997060e8e74ba059045c --- /dev/null +++ b/params_shard_62.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fba8d914399ecf6e46229e10454967f4238c8a4ffa37ddef6b854332f0e303b6 +size 70123520 diff --git a/params_shard_63.bin b/params_shard_63.bin new file mode 100644 index 0000000000000000000000000000000000000000..564098ea961a65811836a3a2462ac58424294862 --- /dev/null +++ b/params_shard_63.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbc3610c190a981cb790cf33abd91ddea8976f055e518fb0c4e3c2e9f71bf144 +size 39321600 diff --git a/params_shard_64.bin b/params_shard_64.bin new file mode 100644 index 0000000000000000000000000000000000000000..f664dcd54a16fd26ca1425d949a1558a5eb8f022 --- /dev/null +++ b/params_shard_64.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d95d1a03b408da8ca7651f940b37f389520456cc1f5604eef46ad1a413d0eb30 +size 32860160 diff --git a/params_shard_65.bin b/params_shard_65.bin new file mode 100644 index 0000000000000000000000000000000000000000..86b183faba49ccaad8dff296dcd05cbbee8ce597 --- /dev/null +++ b/params_shard_65.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d080e76aa908172c2565a39fce8e01888a6d6b29011db17be398b5add05d3154 +size 35061760 diff --git a/params_shard_66.bin b/params_shard_66.bin new file mode 100644 index 0000000000000000000000000000000000000000..42a4baa3cf73adf74894e9f6d3924d34a152b09a --- /dev/null +++ b/params_shard_66.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:048344d7c6881b96f4d7984af0a7f3a8265fe201d1a64be89d677eddaaf876e6 +size 70123520 diff --git a/params_shard_67.bin b/params_shard_67.bin new file mode 100644 index 0000000000000000000000000000000000000000..c02e10fdc8a870d670f5895b6cb50c13dac285f5 --- /dev/null +++ b/params_shard_67.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ab26266bf334f53e140bb41ab088b5d3a18ff58ce99ce5aecf25a8890f36559 +size 39321600 diff --git a/params_shard_68.bin b/params_shard_68.bin new file mode 100644 index 0000000000000000000000000000000000000000..4cf2ea3f35750c330aa701bbce0c9e9008e78a9d --- /dev/null +++ b/params_shard_68.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2b5a9feea4c1ee8e588d494925dff624589137c6740f9934111683e9ca4d63e +size 32870400 diff --git a/params_shard_69.bin b/params_shard_69.bin new file mode 100644 index 0000000000000000000000000000000000000000..b4f91f55c1d1e15abc6c464b2c6bec6db45874d2 --- /dev/null +++ b/params_shard_69.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c01eda7d0077967162f294821ecd7b6dd3f6cce0117711cfcea6e7391d818b99 +size 35061760 diff --git a/params_shard_7.bin b/params_shard_7.bin new file mode 100644 index 0000000000000000000000000000000000000000..8c9068c04da4d601fafaf940b5a4599da7d8de67 --- /dev/null +++ b/params_shard_7.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5958abdf299fe278652f2669f202d8d66135257f87b5e3f2cd9a4a7c76d4c702 +size 39321600 diff --git a/params_shard_70.bin b/params_shard_70.bin new file mode 100644 index 0000000000000000000000000000000000000000..46b4ba3bf95638db3e57bde1a90cc26d858ed213 --- /dev/null +++ b/params_shard_70.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b1b887b65a686a8bbc23900dba070321f08b46ca80a01a65223e24a161a94f0 +size 70123520 diff --git a/params_shard_71.bin b/params_shard_71.bin new file mode 100644 index 0000000000000000000000000000000000000000..fa3e633aed12678e55259f0d32d514d3ba35e7ce --- /dev/null +++ b/params_shard_71.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:548ef072976c1b5d918ad654e164bf634635468f7b6f30cb0fdc60e2df463ef6 +size 39321600 diff --git a/params_shard_72.bin b/params_shard_72.bin new file mode 100644 index 0000000000000000000000000000000000000000..9569a8f9f49973baeea9a64f4e979bddf18cab7e --- /dev/null +++ b/params_shard_72.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbdad02b0cf2a6fb672d024ddd6a200358cd34ff2def01fffe0e68a6df93dd81 +size 32849920 diff --git a/params_shard_73.bin b/params_shard_73.bin new file mode 100644 index 0000000000000000000000000000000000000000..809078e3867a9976f1309ebc804e7ef911991fd2 --- /dev/null +++ b/params_shard_73.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9883b95d4535d2f5fd44baf31672a1fd0d2b34edd4db497648dacb1a3a2a954 +size 35061760 diff --git a/params_shard_74.bin b/params_shard_74.bin new file mode 100644 index 0000000000000000000000000000000000000000..e68a8d30bb3a53bcf424c304d5101ccbf35838b3 --- /dev/null +++ b/params_shard_74.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b3e49827e3d85ea2aa704a3668f8f86a670ec2d7ed6c644beff0dc351b75108 +size 70123520 diff --git a/params_shard_75.bin b/params_shard_75.bin new file mode 100644 index 0000000000000000000000000000000000000000..e301ec2ae07e16ee47f48a217aa3b69d3e5b1fde --- /dev/null +++ b/params_shard_75.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:802d80a454972f4293ecdd9874182d0c8f43fdf1c39d0a326e5608167441e9f2 +size 39321600 diff --git a/params_shard_76.bin b/params_shard_76.bin new file mode 100644 index 0000000000000000000000000000000000000000..28d788e865f11af199c6a4424c7871a47dd18c94 --- /dev/null +++ b/params_shard_76.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f867e40ff5bd58f0129ad9dba040fb5b540e0773d51034c562adcf377c190f82 +size 32860160 diff --git a/params_shard_77.bin b/params_shard_77.bin new file mode 100644 index 0000000000000000000000000000000000000000..62405d72abe5b4a854f734839b35947027a6a0b0 --- /dev/null +++ b/params_shard_77.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f04d4291a9b0ee84620a9872c1204630590451d11ac602f80bec559388ce7f5c +size 35061760 diff --git a/params_shard_78.bin b/params_shard_78.bin new file mode 100644 index 0000000000000000000000000000000000000000..86af9dcfa351c8bde17f65e766c8b7b63cd0474b --- /dev/null +++ b/params_shard_78.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d134b570d2d0970664b1e0401779ed243888eaaab1f6e4cb17e753acfa7da4b5 +size 70123520 diff --git a/params_shard_79.bin b/params_shard_79.bin new file mode 100644 index 0000000000000000000000000000000000000000..d426c29e6d645c68d569486a535dcc96c8c81132 --- /dev/null +++ b/params_shard_79.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee67cfe76cf2f94ec6aad903b194daf1a301958a01e68c1ce5ad8bd231853828 +size 39321600 diff --git a/params_shard_8.bin b/params_shard_8.bin new file mode 100644 index 0000000000000000000000000000000000000000..98f5d8e1b9ce2495d42ecc40d96f20d1bc2f5833 --- /dev/null +++ b/params_shard_8.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00a83dbe73881d21197cb114a42b4e22cac9032bee8ff0d0a1ea592c3df57835 +size 32890880 diff --git a/params_shard_80.bin b/params_shard_80.bin new file mode 100644 index 0000000000000000000000000000000000000000..4f7747b6bd67f223490385559898eb787265804a --- /dev/null +++ b/params_shard_80.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b2c097f3cdd0583bf2c7476e546c8c4bb2ac286831ee7d50444e4eabd387c77 +size 32870400 diff --git a/params_shard_81.bin b/params_shard_81.bin new file mode 100644 index 0000000000000000000000000000000000000000..6a6ecc39bf69a72ff877515a2eced0ec56dde238 --- /dev/null +++ b/params_shard_81.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d319d46b27052614977bbd666d3b42dbb57f1b0d1dfb4ec31ea53ba7f6c3440a +size 35061760 diff --git a/params_shard_82.bin b/params_shard_82.bin new file mode 100644 index 0000000000000000000000000000000000000000..bc199820cf2c9cf8b27431ba5d4f359cbb8d292d --- /dev/null +++ b/params_shard_82.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68059e63308228014d5003418d419ef108ff74d2a89f83f90d29c2749ddeed84 +size 70123520 diff --git a/params_shard_83.bin b/params_shard_83.bin new file mode 100644 index 0000000000000000000000000000000000000000..615a2a2dd9fe35bdd1131ca744d97e91e27f00f3 --- /dev/null +++ b/params_shard_83.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f88edd2fbac7b00f6eefad83b3835e9f6ef5bdae9ee7f2ef337130b2e8eda358 +size 39321600 diff --git a/params_shard_84.bin b/params_shard_84.bin new file mode 100644 index 0000000000000000000000000000000000000000..269553b7bb55fa464e1acdb8faf985ed6c9fc98c --- /dev/null +++ b/params_shard_84.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4132d2051b058c4d9668a32f2164d9cb9d4cc45b6f704534cc8f30895d31fca8 +size 32849920 diff --git a/params_shard_85.bin b/params_shard_85.bin new file mode 100644 index 0000000000000000000000000000000000000000..58f554085fc545352fe865f385568db5e64e1d04 --- /dev/null +++ b/params_shard_85.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e9d621eddd56ddaf919f70710246571c8f869b1041962ceabcf9c61c0fea58a +size 35061760 diff --git a/params_shard_86.bin b/params_shard_86.bin new file mode 100644 index 0000000000000000000000000000000000000000..71d38a0d949f9ad59150accee04321144b556fe3 --- /dev/null +++ b/params_shard_86.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43ceb1f98e71848a1f8c99697bb01e8f8e4e6731f582520322d54e661c2f13a3 +size 70123520 diff --git a/params_shard_87.bin b/params_shard_87.bin new file mode 100644 index 0000000000000000000000000000000000000000..d9991894c4478a2e3100ef3b9325b4e82dde837c --- /dev/null +++ b/params_shard_87.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bacf2008fc642d07ee13a51ed68ba00d82bd31a78174017c785427836e45262 +size 39321600 diff --git a/params_shard_88.bin b/params_shard_88.bin new file mode 100644 index 0000000000000000000000000000000000000000..443880b388c130fe9acdd95608bc472909fecae9 --- /dev/null +++ b/params_shard_88.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1789d56c21fc94e61e370759966b8adc6183ff622f4ee893ff8cbb963a69d86a +size 32860160 diff --git a/params_shard_89.bin b/params_shard_89.bin new file mode 100644 index 0000000000000000000000000000000000000000..ebb67eba974b1c8819f22f81cf066adcd49a2330 --- /dev/null +++ b/params_shard_89.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10d23b230775b7bcc5b39fa870b90800168faa973047a96942decc6d9cbfad81 +size 35061760 diff --git a/params_shard_9.bin b/params_shard_9.bin new file mode 100644 index 0000000000000000000000000000000000000000..2ad139f90f649702383a510bec951328d25229b6 --- /dev/null +++ b/params_shard_9.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95320cadbf6a3338bcc3edf8d9a49b4cf5694c3084b34686ada5d62d0c946a1e +size 35061760 diff --git a/params_shard_90.bin b/params_shard_90.bin new file mode 100644 index 0000000000000000000000000000000000000000..cc316361f7b566a76219e065a090176f214e51f9 --- /dev/null +++ b/params_shard_90.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57a1bcac15975f11ee668d3386bfe723c12900f6904d7227c9bd34c58967264b +size 70123520 diff --git a/params_shard_91.bin b/params_shard_91.bin new file mode 100644 index 0000000000000000000000000000000000000000..1c1f4f55e3ad355e49c4ba927edded9500992a09 --- /dev/null +++ b/params_shard_91.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b4eba14a7c08a82ef63596bf08debe8ec4901b5304990da83cded9106c95596 +size 39321600 diff --git a/params_shard_92.bin b/params_shard_92.bin new file mode 100644 index 0000000000000000000000000000000000000000..7cf67dc09e9e6f2dba1b9993c8edc134440d6fce --- /dev/null +++ b/params_shard_92.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e57b877586b9bd4024b082c7ba3ff23455d86fb1b1e921a84c4f82e93626911f +size 32870400 diff --git a/params_shard_93.bin b/params_shard_93.bin new file mode 100644 index 0000000000000000000000000000000000000000..9d338c9a973df42ab6a636841c416c4e7961db35 --- /dev/null +++ b/params_shard_93.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9519445a4319651c34c36fdff36e0ea41db70dcfb027e583015c1c117c873722 +size 35061760 diff --git a/params_shard_94.bin b/params_shard_94.bin new file mode 100644 index 0000000000000000000000000000000000000000..761404b7bb4a86b3c1f0ee0a57d326e9b55975e4 --- /dev/null +++ b/params_shard_94.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a42149fccc25a9e492fff18dbf6bab42ea36ea6ba222e09eab7b50cd2e19cd3a +size 70123520 diff --git a/params_shard_95.bin b/params_shard_95.bin new file mode 100644 index 0000000000000000000000000000000000000000..ea447af077c5ad36dad94318a2453fe9b20a6321 --- /dev/null +++ b/params_shard_95.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdf9405af13d04307d0aabac235d02903668303c3151588215d1ce2e5d3e5e4e +size 39321600 diff --git a/params_shard_96.bin b/params_shard_96.bin new file mode 100644 index 0000000000000000000000000000000000000000..9c47778da667ce4f11c891475e0e480e3cd8eccf --- /dev/null +++ b/params_shard_96.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f879055f8e31b922477d9d36214c512cc877ac2d649854082a58e901670b372 +size 32849920 diff --git a/params_shard_97.bin b/params_shard_97.bin new file mode 100644 index 0000000000000000000000000000000000000000..9848fe52e3e474544a5c2433ef8c26cc174e8670 --- /dev/null +++ b/params_shard_97.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a207ce6f2e3cae6fef76d678aed458ee2e72878f19d47638d1b1ccae8a9b5ee1 +size 35061760 diff --git a/params_shard_98.bin b/params_shard_98.bin new file mode 100644 index 0000000000000000000000000000000000000000..9603be86f647f54e6b7ab39f09f96bc0696204a5 --- /dev/null +++ b/params_shard_98.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d668e4fe5062615a14efd11fbcbd60be329f428756f0f6d0375329a7547c97f +size 70123520 diff --git a/params_shard_99.bin b/params_shard_99.bin new file mode 100644 index 0000000000000000000000000000000000000000..f908b6b70022d2dc171eaca54a0821a66eae428b --- /dev/null +++ b/params_shard_99.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4632b4beeb98f60676d4a0e6bd584f7b5bad0792bf91fc77104c6a6e7a4b3745 +size 39321600 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5da38ccf69b53ce9e5d9a9b954adb196f77946af --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,11 @@ +{ + "model_max_length": 8192, + "tokenizer_class": "QWenTokenizer", + "auto_map": { + "AutoTokenizer": [ + "tokenization_qwen.QWenTokenizer", + null + ] + } +} + \ No newline at end of file