diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,7279 @@ +{ + "metadata": { + "ParamSize": 533, + "ParamBytes": 9234108416.0, + "BitsPerParam": 5.001536828453907 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 389283840, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 152064, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 389283840, + "byteOffset": 0 + } + ], + "md5sum": "718b17ab89995acae054282c19765133" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 48660480, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 152064, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 48660480, + "byteOffset": 0 + } + ], + "md5sum": "323bf7391cc68a49f27fdd5d9eef22d0" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "a94d103570dab2f9b057fd15eb82bf10" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 389283840, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 152064, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 389283840, + "byteOffset": 0 + } + ], + "md5sum": "9dba084c48a55f21ea9615c7a46761e0" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 48660480, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 152064, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 48660480, + "byteOffset": 0 + } + ], + "md5sum": "75bca7695c154208d8571ec9d60f7d3b" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "a45a8a94fbd4a1ba16fd1dd4f870460f" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "0842458e253cdb7eec380ecb49cca379" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.0.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "57ff5d2fc7f9c2405659fa9bf4ff5cc3" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 33140736, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 0 + }, + { + "name": "model.norm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 4423680 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 4433920 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4444160 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 8867840 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17715200 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17725440 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 17739776 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 20033536 + } + ], + "md5sum": "069cd5b16478f27168c9ed5dcd574454" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "9869dc1f08de28c8c1e55e0489cebc8b" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "6b43522337478a31c77a76f17a7d79db" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 33294336, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 0 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1638400 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 1648640 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 6072320 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14919680 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 14929920 + }, + { + "name": "model.layers.1.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 14944256 + } + ], + "md5sum": "18f439382b1ee118f2ac6e8fc86c2e80" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "aaeb24b0ef10379da938e1d3f8581fd2" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "ed855b9b006ebf3e82b080346e2bb2af" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "915052f8cae808457e60e06ecf45534b" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 32638976, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 17049600 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 21473280 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30320640 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 30330880 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 30345216 + } + ], + "md5sum": "7baaa43095a3c4d2413f8feb0f015b04" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "71d48dc956bb1ec7165150a6f32914fd" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "5f7725a73dc024b4637287eed3772899" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "00ae263634b7eef6d6346f5ce362e9c3" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.3.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "c21b7f72c8db473c82a717b57fbf0aa2" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 33130496, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14755840 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 14766080 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 14780416 + } + ], + "md5sum": "460746b1c9ddda5d8b2c51992dc3c385" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "35f82d6221b93d44b6b24743b2c225ff" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "369164c1ce1e156ac82b2b5ea3e6361b" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.10.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "52c111b04aa07a0217070d64bd318290" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 32638976, + "records": [ + { + "name": "model.layers.4.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 17049600 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 21473280 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30320640 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 30330880 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 30345216 + } + ], + "md5sum": "391159aa84bfe9f85c895fa97fece8e2" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "075ca11fda2ace9a9c299e5e30f7fabb" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.11.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "8a077479e77a01c5033390404e833c6a" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "abc55fa83de4ff5ec10ba32e22e4dd9f" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "acead9c7abd89198186f00af06227726" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f6754d29ee782fcd299a887ffc081180" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "2120635450b796fbeeda269572f8983c" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c7cfbe372304ec6e8920eefb971c12cf" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 32450560, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14745600 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19169280 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28016640 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 28026880 + } + ], + "md5sum": "ace530886709db34627cdb5ae73ba99d" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 29515776, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8847360 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8857600 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8871936 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 27222016 + } + ], + "md5sum": "5894f32993f061365c858e46564363ff" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "d503f595d6a606de4e414d2dfa297196" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "0cb12c672d9eacfa3baeaed177b71174" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "313b077f332ccd948ae84a8e30705eef" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.6.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "4172591c4c70ef65b290e20d503eb609" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "a1269ea050e1f3af760508017958e2f5" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "39fb6be79ec8efb22a916a5f4b8b5430" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "38c548fcaa9609c6a2c5dbea9aab8911" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.7.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "e15b1c2ddf20f3b9356db0dddef1b9e8" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "c2a0f127fb642bb3738c5db66d2e7b6d" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "103ee49177cb76817ca4f5c27479f985" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.8.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "1ae377a334d3b6338a351c41d107a5d7" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "cfcfb44b6a38d3988958230cbe919b80" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "55c410146ec8e595220ecde24dc88328" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "915b6b0cf93970ed063b2ffba068d5b1" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.9.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "88cd11a5ebcea37e4cbf97d0ce41ef87" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "20065c36afa76f524d4b2387ce65e6ae" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "2ebdc7b7e80cb32481672bcdc8628922" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "4fb6c79d14eaf574fa31bb09587614dc" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f62cb7101c6fae216e9753d4053b781c" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "335b4495f15e4e407d0ccb7a3908e7cc" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 32475136, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14745600 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19169280 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 19179520 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 23603200 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32450560 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32460800 + } + ], + "md5sum": "51a57d4c2c4b21c66e170093297d8c17" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "efdd56cb83117ff2d88af5b716ac3a01" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "476e1bd4d476a84cbee1049283bc6afa" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "dab34a31811c0bf3e256b238404ede30" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 32638976, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 17049600 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 21473280 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30320640 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 30330880 + }, + { + "name": "model.layers.13.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 30345216 + } + ], + "md5sum": "1fa851d7e376ca4a37c6bc18bfd26ebf" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "4edad89860deebfdfd6f6efb38d1d704" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "049a8c23b2c65086cc2a1ace5d95b26a" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "5adf43c3c3873b38047a1b354f7b3e9b" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.14.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "a6ad79debf390825ee1a9b99d9b7901c" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "1369a2fc9ce6dc76af0954a80cf27150" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "3f2b651fa088465db78e58e01ac336a5" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "f4bacf27fa2dacc0829547bf37f3d441" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.15.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "817d5283cddcb962ca2da640ad0c99a6" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "39b55910a11873519c8161c7471cf9da" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "6a6b6d5aa198eac338c506f1c8db7cde" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "b4be2cd67a702e2ee05f76a54dbb4a9b" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.16.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "8e0fe4c8db60f134597eed0e48f313d6" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "8934a8cdf0d56372d874b58e4b211953" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b9f954044613f10262e8272977dcfb93" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "27831f8192df5efff9bcdfabe44c7986" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.17.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "4e73146c0ae5ad7db266cfd450e1263b" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "03915fd37e7c559eb8713ba543516261" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "d1090d682cdf81e42f644f146317bd06" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.18.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "37b9184311164c8e669d22508113fa76" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "b5746b1637def17a737abcd63b352c73" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "77865080587584144a5f91e2c1171ccf" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e4e9a00eab9b30be06fc043546bcc8de" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "fff39629763fa2ba88692f40a0dbfab8" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 32475136, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14745600 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19169280 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 19179520 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 23603200 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32450560 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32460800 + } + ], + "md5sum": "93197bc1af6cd4236b784fe5210caff7" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "416396660331e9abef66507d3410dbb4" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "79dbe7522db44af6682fc8c491bfbf39" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.20.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "81cfff7579b9e7e1c10456076cfeec35" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 32638976, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 17049600 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 21473280 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30320640 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 30330880 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 30345216 + } + ], + "md5sum": "9314570b4550f2b9ae901e1076c2957f" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "16ecab1003706a43fd065e7203e25659" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "ef099bacf09e7430ead7ee53c29d695c" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "184a7caaf30554b0bdd43e4e45eaa218" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.21.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "f61fcb8f19ea80987c53e07bc8321226" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "293769f22e954de1c06e3f29fd5d9c85" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "2c6a7564bc89e79610b25a5e6581178d" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.22.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "c6428193295c50aa837b50552c052e0a" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "6408e88d3872169041c69688d89cc221" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "b697d887312ca39cf83be12e89884804" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "bf60b46fa8e3cab8fe1071ff477b1b7a" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "99c683f4b54b91bf566bf54477c52e01" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.23.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "b6a8f1da42c8c17015417c9875e98095" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "562479934acf784ff6ae7c778e2a5b0f" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "3ca7311f7e6c0112ba4e63b0fdc7a57f" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.24.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "9ab020f7a4ffdd08cab27052d6b9030d" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.24.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "fd8cc6f8719734584317368e8a822bf8" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "f37141b92ee14e780e7e653325094499" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "523a09a6b57d426d77853dcc8d0d604d" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.25.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ae9af29d14c15f2f74bb36c75edf7ad4" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.25.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.25.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "a03012d87f7266865dc5e6e8daed09e4" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "9881372b9c73d4e9d41dcca72fce4d35" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "eda357db40f8c4fb392bd22b756d46b6" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.26.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "a5a9da3d5240f8b22eddedc67823abab" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "c99dc2e95b6d14acdc12883eae1b7079" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "2c523ed56b84313826ac17d6890d8e5d" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "987b092df789070eb1d95b5dbe21dc58" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.27.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "bff51da716ffe156fbe6e32b81ea8781" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.27.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "8e4a60798a3fe8b2a0cf74cb1769de5d" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "66ddbb0830abb192d19538910f26f617" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4aece74c1f78cab4f9f9553b1cd3576e" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ff67cfbe32eea7ae77942499d2d2c9b0" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.28.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.28.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "a740893c94622680bf11a9b32114eadf" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "c891de351c9782b2e88598efd2340f37" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a9bc0c53385f083bc82d5a8b572fd65f" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.29.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "3d494b092eadefc7a5361c14f64d9dcb" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.29.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.29.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "b80fa4ef0c2763ea81b50f36162f9dbb" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "919e483a9add85d180b7d5adac38e810" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "8038c98a14289430ac329f1836dc61dd" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.30.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "d35d55651ae252b2adcbb0276c5884d1" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.30.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.30.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "8c0cbef8b286b6eafabcca28d39d8713" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "f8c202b417645fd220f7d1497fcf05dd" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "0a80c3f33c078bf03772f55da2ee4624" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.31.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "9f2d9385ec14236dcd423137e7d72c82" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.31.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.31.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "cca9fc9054d556afe7f51d76f85f907b" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "84c4bede34fd35db551f7cfffd501ca7" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "39366777f5968abcf673f7a4018ed007" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.32.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "009bf020f5840bc4eca7c9ab6de57f9d" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.32.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.32.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "cf4f6515d5ca15cf4b15854dd25e2d92" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 33130496, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14755840 + }, + { + "name": "model.layers.33.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 14766080 + }, + { + "name": "model.layers.33.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 14780416 + } + ], + "md5sum": "1d448e5044447721761df1ffbe3c4643" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "6a3d1c4855d6ec08132960b56dedbae9" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "949d047600921cb3d3c094f171701e20" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "59b36026d71b31b597529272caccad61" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 30320640, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 17039360 + }, + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 21463040 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30310400 + } + ], + "md5sum": "01d0f7a949de924b9790cbfaf353d021" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "7b1a6db8f1d89b664066d91226abdad7" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 31645696, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 0 + }, + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 4423680 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13271040 + }, + { + "name": "model.layers.34.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 13281280 + }, + { + "name": "model.layers.34.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 13295616 + } + ], + "md5sum": "588b8f242eb276652edad1b58dce981d" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "75ecd0b48e816bf33295da455a52f00d" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "47eba4c57c1c084eeb045a33744d339f" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.35.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "84a8a4345b826e32d9ae8a83e7514f2a" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 32638976, + "records": [ + { + "name": "model.layers.34.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 17049600 + }, + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 21473280 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30320640 + }, + { + "name": "model.layers.35.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 30330880 + }, + { + "name": "model.layers.35.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 30345216 + } + ], + "md5sum": "205df9426a65d7fdeb6f85fa45776cd1" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "82ded4f3ccb4e6d5f36f5c083da750d2" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "2b1029285e309dfc2c7ca378578b164f" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.36.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "3f7bec756c4f9ac1b3bfeb07db7d8fe6" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.36.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.36.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "5431428b9cb110dc2b51a68aa425c63a" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "3dda26ab36cdc119dfc84d2d2ba77bba" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f2bcf86a33fa862e99c660571e0933d0" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.37.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "6fa3a05f347ced336495cbba974e0544" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.37.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.37.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "46108ef48121b80ac9d9f3c3a8ffa9d7" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "4f3e27ecd33ff18b1e9817a5ae0f9a14" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "2ae79c6d31239af078356dd7c24262e5" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.38.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "bf49f282db3da04e29ceda0f8cac6074" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.38.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.38.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "1a9966501ec25e8f4e021952fbe70a65" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "30c7c125fe1b97bfc2f80cbeeb2a0036" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c396bc5cd6a80a8b8fbcb20ee5b656af" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.39.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ebcacd7f3bc3358e58302bb6d183858c" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.39.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.39.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "968dfc1e04160049650718d9e28a84fd" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "433f95ab0fc67f32308a76c61878e9f7" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.40.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "a015e12d0e4c688eac76abe2600bf619" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.40.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.40.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "9213b05276df8bd335244e1bf0d3bf60" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "c83f676856094b1cfbc1e3912e214c33" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "22c529812c40fe2210068cc992ff2888" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e14410965108c90e843cc058f8fb2a22" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.41.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "995cb2ec795235139229d9c1ac1f07f7" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 32475136, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14745600 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19169280 + }, + { + "name": "model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 19179520 + }, + { + "name": "model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 23603200 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32450560 + }, + { + "name": "model.layers.41.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32460800 + } + ], + "md5sum": "35c309e5830ac7d81b8607f7218883a1" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "689449f01880f6c9f1f85396cdea2c2e" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "5f28ba09beb57d0727bfb55b1031df9e" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.42.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "b8ec0e49c3afdea77fd05b7d42aa2615" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 32638976, + "records": [ + { + "name": "model.layers.41.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.42.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 17049600 + }, + { + "name": "model.layers.42.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 21473280 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30320640 + }, + { + "name": "model.layers.42.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 30330880 + }, + { + "name": "model.layers.42.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 30345216 + } + ], + "md5sum": "c2d9e5daa2c4b71b723f0fda0b93bbc3" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "6728a8818bfb5085a9031689fae0b2be" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "2cb0ca634092d1d8bece5df5ec1f5d02" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.43.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "65d140a89d070c3e7b6697ff6e81322b" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.43.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.43.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.43.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.43.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "ce5a6c8160691cbe07484f900c101e55" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "557a24c541bb1c75e6dcfa50108b5ac6" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "da90c84c017b4d9598405207d1a42d68" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.44.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "5a9cf503069976ead783e019ffa50ad5" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.44.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.44.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.44.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.44.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "1b10bf5599a3af4c9af6fec68c2289f6" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "916bc70ae2205034897e4eb6994da501" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e3951d8866a11028e03bb866791da0c1" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.45.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "3ca62695a28461dc085c00a75b6f5b9f" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.45.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.45.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.45.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.45.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "850ad1ef088f64a54e6919dc918bdb4d" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "1c1c5faf61e32b8e0b7fb1ca671b9031" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "feedb50cc03aafe7301fd1e513ab0a49" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.46.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "3d048bba3d6b60d68719887b6838728f" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 30345216, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.46.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.46.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.46.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 28037120 + }, + { + "name": "model.layers.46.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 28051456 + } + ], + "md5sum": "ec11d9552e584d6bea3739a1ce613f9d" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "011b4a54e197caa8bd5a8b91b19677ca" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.47.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "d90618aa2eea18a36a28caabc6b57ae1" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.46.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.47.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.47.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.47.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "5ae3d9baf9368a8111674342830b3aab" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 14745600, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.47.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + } + ], + "md5sum": "adee9458c64fcc1b4fdc1398f0c30055" + } + ] +} \ No newline at end of file