{ "metadata": { "ParamSize": 805, "ParamBytes": 38801278976.0, "BitsPerParam": 4.500219892981061 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 131080192, "records": [ { "name": "lm_head.q_weight", "shape": [ 32002, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 131080192, "byteOffset": 0 } ], "md5sum": "d26ed7b17a405357850bfe492458002b" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.78.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "78f228a0355803b75001bad1f3615463" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "aafc631056791589e0a02f0ee719edf5" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0295dc3014065281cee547da63245896" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a645b1b4f285d1bf6ec07b1ce6542786" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 31097856, "records": [ { "name": "lm_head.q_scale", "shape": [ 32002, 256 ], "dtype": "float16", "format": "raw", "nbytes": 16385024, "byteOffset": 0 }, { "name": "model.layers.78.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 16385024 }, { "name": "model.layers.78.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 16401408 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 31081472 } ], "md5sum": "54ed7f3ba62f49bc2ec170bd9cf71009" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.79.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b4f4bf83095d7a3bad570455c51546f6" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "16927aa89a414429b7f2871826b7420d" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "9bd9b2ef1d6eaf9520c9fae25cc99169" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.79.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4706fb24cb2a3e19fdca63b852b309cb" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "85a49464cba137aaf79c29be87e24f61" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 131080192, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32002, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 131080192, "byteOffset": 0 } ], "md5sum": "acc9f153143429ea3327d40c6b7d39ee" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 28360704, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.79.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.79.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.79.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 18907136 }, { "name": "model.layers.79.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 24150016 }, { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 28344320 } ], "md5sum": "9774c7617d62714dfdd52de90a588c1a" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "706c7e4a0a4ae7722bde325bf44e99ee" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ed62ebe2c8aaec983b78bf034b975df8" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1f73ed4bafa982ccd70761ae4e7cf08b" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "45e6e63364ba8a3c71869fab54f1f855" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 31097856, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 32002, 256 ], "dtype": "float16", "format": "raw", "nbytes": 16385024, "byteOffset": 0 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 16385024 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 16401408 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 31081472 } ], "md5sum": "6d2b92666564a70a95fe12b4e0a73fe7" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "500db063e664dd4cbd3cda2659624983" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ae29380631acaaa6477e8d52439a2e69" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "14ee0223a36f905f6fcf878c1f5ef927" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e6d1eb543d5421315e73ccce14f463a6" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5b541d9f0e9cba02e33d076758aad8c3" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "463fbb4cf1c31478d0e1e865acfbe9aa" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 9453568 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 24150016 } ], "md5sum": "3f82c0ae46f4e73ebcc4dfd614ca5c07" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "269425c132659afa782f84d98bff5d5a" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "889ffa3b74ecbe7e6f2aac34c6ea16a1" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "00efa5831481fb0c65663f79e7fbe45c" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "333285d755943580e9ffcc0074fed066" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5b09c4b3e42b870c4548fe477767ff6c" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ff4297d63f767d603792678aa2f430c7" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "864078217605f1dd4727edcea9c699d5" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "59439c9cb57cb2151c55443c79867b0f" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 4194304 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 9437184 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 13631488 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 13647872 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 28327936 } ], "md5sum": "56033d72df0b36fb94548f4a2dc5efe3" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2f7abf62261d42335ecc4859ebcf52bd" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "fb8a93b5619a5eb22b61c647724ec9cf" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "013fe192e48374aa165e1d2e7bca62e3" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "22566ca491918b96705873d07686f2cf" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ad8c3593b98c6a3842978c930433edf5" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "af6d8fd591dbfa1773d6e97606fb0764" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 28360704, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 9453568 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 24150016 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 28344320 } ], "md5sum": "e338744a7ecbb0d20f567c8301e9a692" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a59300365a4e58d995c07683c83540a8" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c4beadd2fa852d18400c8be687f05386" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9ad0d60cd068985a023b3578c8061b70" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2e51b2a2a93b7133a8760b4e38fca294" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "fdd423a3c78adf0ca917297a23a6740b" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "7f9b2b5f0714a50caa0ab3e174c0bc8a" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "427285d588e28d90a1ff3dd8a17a5e7a" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "02c848695d44e215c7951ad43c829dc3" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3e271c232f0f572ea66c6f55f2646452" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1944a9d12c93c882afb3e8dbf8b30eb3" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1cfaf05cf5e5506c29beda74fd657bb7" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "6741ad1211741c78a23462e3dfa318a0" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3d0431ddeec2225b728c75b9d6d2d739" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5e759cd3bc8788b984dbe9d1ca75429e" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c818083bfee2541c2a9e29a999b5636e" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "efc7b59b67f1449db21159c8a2f5985c" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "760f54de564a4e90c9cb6fad663e5aeb" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8f7c3cef902520adc06f69336a5be7b6" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "82b400904d570f2b2353ea81234fbb3a" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8a294b1bdb4bd0d2547718ecee5bda93" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "27f91a0010796d4db3e57456a224abfb" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "28ebb99816c53b270bcf10b83d90baba" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "01dc78caed1887146aaa059b6a4936db" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "d14bd045086460d43fae2d0cc8863a0e" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b1c699a8174e0e4144700c9c31113cb3" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "39b591b3c0744427cfbc7979d8c4eeb2" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1457b95554a19bcf9c444fd6a12a8138" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b0859959d2c5fd5b465040953603fd12" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b3cf630fae8216c5360b2a87829fc31d" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "ad1f74d5fd35426f383da98378348518" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "21c4adeb457478a3662ce1fd7e55cd05" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "7cb72924b345040e3345a651f2e688b4" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0b878404251658b8b0c6fe742f8248be" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ec32f086bc1821d06267d02603f69319" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5bc4b9ccf30b06d846fba81ed25d0fc0" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3f6ec1aef27b5dd35f87c578aaecc7bf" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8f7eb956ee079b07a63c8c9b2e3b53c3" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4709e7ea7e0d7e39924253a90e04abe5" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "d4fd3eb7ae7ac048d5f789d075beae94" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "836e1d5d5a7aeebc666a00510981a2c8" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1cb150415fd2cc128999aa1160d5c94b" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "8377a664c65cf7d65f059bd3277e6881" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4b87acc820e2611e5d8c007807293e04" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "dd456210738fb1e72e5d4417856cd0bc" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e3ae14f344b3594c8b0f6d1542b94709" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "88bb14dcc9c3053dbf7ffce646c1ba44" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "763eefd1cd42ace3d6032919c2695ff5" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "c1b2c382c699bff1de87496a42b9bbc3" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ac4fee7b3e58de27f36ed3d20d28e10e" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ce504359e6dcb2d4460c97e79cad5716" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2c20b5bd3fd40887c659a628cd6ec22e" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "09c3987a21ae169677259ee756c244c4" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b4fac1b4fc86124708edd1f47224170d" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ecd14e1e59fc0863f25b397728393843" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "1beab732621cb67e50bf259f2d1be547" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1c3bca4f3e73f5d0ff26d89ab59bcef1" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5aeab88ad20c1d29e61b74c28d7d4683" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "dca6b9ba2aba31a47aeb4b5e5e04814f" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2f00184396b9a892aeaa5290d2e7e405" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "436fd1d30bdb92899308dea7028a4865" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f5eecc579feab92aeab772d5a889288a" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "11f853ce929ba7c215376f4f3ab673ea" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "056464ffbc1d0a29814dbdd156af10d8" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fe2ba5c2cd1f5b9d512589e00c5113f5" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "849e9df7e1f2d369c0a86ad2d3a890eb" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "a29d434b25c1a31ea72a2bca6060a450" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "11018734868332f426073889f395127c" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "10bbf655d2861a5ae560b6d3e9230df8" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "dc969cd91aabe0afc356b5f5e79c3824" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0d027abe2a5ec617e7700664c6806e60" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "85eb52e948bf56f008c58ee2dc129e4b" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5fa932485a11f7e12acc1cbf39f712f6" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 29376512 } ], "md5sum": "9b32671eb6cac0b5f23cabda946c44a4" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "137fc36f14564249c290201ba3544f4e" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "97b21aadfe4816d2120d786a5f9fc720" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e0fec881f90f23022d7879a6af6e2013" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "47c4782d13bdf601cb85e278bf7af319" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 29409280, "records": [ { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 14712832 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 29392896 } ], "md5sum": "50aa068b5c47b046aafb003dfd7624db" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d70002c68a95061dc31fea4eaf47aef6" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9c52e797710182d4035292fa72116a47" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9a375086cc6f2e359b0dd34e0e7fdc79" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4ba98a5be107510b83d83b73ffccea32" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "073b8898b3d52ef5e5138ef6d9a2e42c" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "bb78a7fecebca0bfabcacd426ae782bf" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 9453568 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 24150016 } ], "md5sum": "986535e9b84733f88f66b854778a85b4" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "77237762694f3b9eec2aa814ccca0a36" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "74e0572d5360bfbe93074eeef9cdbc02" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "004f13b2e118264f230055786f1b339b" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8ce304be8fa2ac86dfe12ac8117472e7" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5fcb96a58da05494444b3ca9b5b33d98" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cc3f97394b3372012af0db0a70077f90" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "63b0c5944de371efc13acde1a9178665" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 32555008, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 4194304 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 9437184 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 13631488 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 13647872 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 28327936 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 28344320 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 32538624 } ], "md5sum": "389a9b3f0a6e694305b30db44e31fc2d" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b2b61f2cbaaa4bbdb79519d5bc6838f3" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "eae42d2bc925b4f9d560d85903d53488" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9f73d64400c0a39eab366bd1a17fb779" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a72a876cbbb6f5d5ca33ef71dd1f1690" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a120560ab89063218bca76a5e243c238" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "8ef99352d2e77b946ab2693d07169cfd" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "390fc5972e613cd40bb3538a2ec2403b" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f8f7ec561012037e3ed89b7947dd0965" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0346e642822786878832e409c27fea11" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "96baea2936156da42e4029fdec4af14f" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "631a85e9fa71079cec155b8f41d33950" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "58cdaddb97e867c841a2dc44246b8e90" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0353eabc6269b7dd0a8804ef4f3ca448" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "69c798ad93175c48790f8b85553adf5b" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "58d10a6144db7a9e3694da9c97160f78" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e117483a7e426b8064e6b4e61c4e49c0" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a17b053cee132c6cee89152b2d399c03" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "0f3901a5625cb6cd0cd2fce76b629cf2" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "789e8bbd7c97fdfd3aeb936479d3e8bd" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6c387e8ceffabca01038e3b8d37356f9" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "dbd23a58f0fee5c909cef053caea0e04" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9dc05c758c5f9b4fa08cbb19a1f9cace" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "994179f27ed9a376325d3895f11a08ad" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "820ee0b8431c786f75f3a3a0e9a922f3" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "86c5f84c64ec3e101d7d2b9dd7694cae" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "364120979e6e3852b325f9495ddd89fe" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "a482777182441979da752679b6690b7b" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6ff47161eddd77d16cf325ec1037954a" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "938a998899677d7ad1d46d1f5b781c63" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "02d063057bdb5d1bd8ad8d11ff3d85e9" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "236d0be8dbe050fd0cd8c6f090ab76ee" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1881cb7bda1e69d8def7143bf958a996" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4f4f660fb42efce723c813fe3107e40e" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "379a016278b5396904043d4781af9f08" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "dabfbb780376cd17c169784078ee7102" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "b7dbd9d70d056a868fea80511cbc229a" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "eff343a6cb638f27f6cee13cbdea6b6e" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2fb97c620fdc4aadabfe752d54afce96" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c6951e4aa346c64f1732c4a99c825d57" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f97efd7d2c770a1d046cf4402baa4e09" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "cdb7adfe14c6807f61b4557e8447d50c" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "83c33ee97eff03008c9f1ec98c3e733d" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a4731d0486018dce84bff4b398c2953c" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8aedc66533e32181997571d069288499" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "fafe7d02a2d2372cbac5649ef2e5e220" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "db477ed45b46d1a3bcffae1d9d164f46" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "25de4d60a8b52c8150ab6de596bc99fd" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "dd6a1d1f81d44ebde175d1950193237b" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "37bf25d674d58ac6eb8458d97334c3a4" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "382bd7480e5a9854e7a3235edeecca56" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1b90e698123f909b29791ca8a164ddcb" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "233c72c36ffe87ee67fe97e4e3dc013c" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "824537fbfda5144b5cd945c11d29b73d" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "4f32f401264ae5e21cca9a42644cdb5c" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d7c7a25c31f9022c61a42f2e3a15808a" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "7954c29c6bbba79521604ee92a094b4e" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6cfe7fc5173006c02e69c2642b7a28d8" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "dc9a60a4b95f02969d38be4884b9d944" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ae794f6bde0a8a153ded9f47d92f7866" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "284012b7251d46c59144c095f4d28b15" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "9c2801c22d158ed5d75b251c03ee7d07" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "976eefae5144d4ae9c63020a30f0ccae" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ba13f5f8c40a82285ccb7fcec1b08114" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5d9e0b602bb0c5a99718a51201c85b0b" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ac107daa7b93d33ef61e337ce6aa7113" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "2930771ed96ab4eca8a55758fb79ded1" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f2d113c33d5b1e01274b10ef5d0057ef" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "fb51c55c77e4121d5fce0052897c90bd" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f7afd5da1d1eb88e8007e73a054ee14c" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ab00834efe1c2d1030d944b0be22c077" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b44576d8751b073d2787da7fa86dba58" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "10946c588e5f7942d410a4659e794734" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "aa9d98adfd8292523c26e6b5adf83198" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ca6104386b0b1b954042022f6abae6bf" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5caf22f1c7367a071e8f1c9c35325ff9" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "149b1ac06a6ea29da112819819dab8b0" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6c23df5a348ab2c7ea4b90d86194cbc2" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "68c1f13c238fe554f6faee2b5bb6e0c4" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 29376512 } ], "md5sum": "b2ad1acfa772db213ed1b53989f8d27a" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "bdacbf065c7a781c68301e8197a5dbb3" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "72c830281d54fa8db694cef62b869c11" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "78f75629f38e3eb2a8c4bf115cae3190" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1cfc12b72d4b420d4085f05111937c3a" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 18907136, "records": [ { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 14696448 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 } ], "md5sum": "defc60c5ae7a9c305f7d4b1f25eb0c12" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "489ffe1505cab6d97f55a506e3db87e7" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "66e48553629cf262b187c93bf557c8df" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "11c47a514a5ef6e30345c0249586d454" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b180a955267c6fa299af718ada63b58e" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2f7ca4e9ab4c3dd2eaa18f67f5b1d454" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "422b1e2e164b8d75a4f45b6e564e0b36" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "05a0145fae43cab308e294b82af6b12d" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "bf7d14d693850f0e4bfb98bb53ae7b3d" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "65a0dc83006603818d83862046b4dce9" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "67bfe1157b12c6c7b0ae259b08ac3a78" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "8f3a883bc3e6013a61d0ca1b6d0ad486" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "348a6789d4bd10b73562b1e1625d5e71" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3a68ae1777bc8020693476742b61e630" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "028203da4e9c688f40fa869491b0a31f" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b59b2262435a88fadcda961d37646219" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9bb0c217fd8d49205b8e91935ad8f60d" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1bf50ec2f0636cbba68ab86303614eb6" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "5e775b6be805921910395b037c23692b" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f27df18659ae0c30cfe75f9ac956d6bc" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5fe4a19461ecf5f2057047cd2ca07eb3" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8ace825149196e2d01b065f5ad401d0b" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5173f2b79d5020ebb5e527fcc8918b7b" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2d7c0386739de8ed9a989502e395d57d" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8d71c4d841f42fd1ea49eeffa519ad3a" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4c04126380aa12598bf0091626edc0e7" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "56456ca0042f1b8d1168ff306bd6a258" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "5367a1fdb66c753e8cc6eeb837b000d4" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e1c6e49ce4e7fa2c9d74a8d66e4308bd" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "16c2b9e89b4b581928bc66dce940b874" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "e48a8b4386152624681e45eb128984f2" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8cfdc08e86d20d5de8370a2accb095ae" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "fb327b314e88b646f4fde4bfc069e223" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b453bbec92b6d060f1adb0fe0c2cb019" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8796e05e519c89e31250cf3bd1e96b26" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c1defa04efe5e08bd32c5e0b57ad5f93" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.42.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "f61cc523a305a8ea1ff3281042065ea1" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e4ca171cd84f0a62f90e0ba0d3f12fbc" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d737d666b2c10a44cec675527a091c99" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "476e9ee83bcd97022ff4a3c2239cb440" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5d8588664c962a7261b4aca459f372ad" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "eabae97215d0f21af58563aca8d2e66e" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6a6a7d92397e0c2eaab5f6a084b447a0" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "867f003a19977eddec01763dd969d57d" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7de5f493fb54ed96367d7299ed141db8" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.43.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.44.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "c20bc8c37755e4aa99d0f612340dd912" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b02bc09869d178c3f0717ba3915b945c" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "da9dec1390fe6a73e57d203be88630ae" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "bd6102ce7d3eac55d9c9a5c7c8828160" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d6e919f0a3208286783d3f342522ef5a" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e71a3eb451ab9206db7d566767099575" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2e18d5d1febe4fc6d75888599eacadab" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "dd04aadd0acbc17c6668c34ae2b20380" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1c0f5815f36cb4094cee0356ac3f7dbc" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.45.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "5eeb35a1f5c74ab737dda1df59ae4e4a" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "edd72c6f49354129b9667f1938a3e503" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a88c1184eb3e08d6296ca9eebbb20564" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "21d65a4acdaece2a2215be82eed87e69" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "49c720bd3aee7145ae0c02cc32c94cb3" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.47.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "76d85293cdc984d0823da6461c196f77" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "209e4a1fcdcdb1cb1347a01ac03d75b9" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.46.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.47.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "9ec0cb7be62a8788907e891cd22f34db" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b0aabd3d4eaedb128af2e031d645ada3" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "85f1658224d8eeb94cf7ac35becf3fbe" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a8c0e80fb0524f27def3af7dd280bcc4" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f7d949bfe2f81e58aa91d4dd17b8656a" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "2e686dfbda48d51153bb866543ec77e7" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e237669a32dee127d04d7aae0ac5ff86" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "bea89f805c14b4861c81d3e534bbe139" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.48.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2f69d424d210f28cfbdf82fb3cbbdaf5" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c0ff2ebf50b468b09520b4d2fecc1b1b" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b7f3a9d870036127781ae48a6cc34c7d" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.48.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "52ac8782880e2fb01170dbdfcb4728ce" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0df87bfcdc007540055fa216321274c3" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3bce3f7ae3937dc8b72c35eecfc55b58" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.49.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "63e414c47f27669efb782036e04e3a65" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1d12384999b89beee9f57c162f16fe49" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.50.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ade4183c407663b32f6a5a4e419f1072" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d601e88561a20c1787f2d00d87c4efd8" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.49.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.50.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 29376512 } ], "md5sum": "25552b4e1796cd0c36f65d6b78e032ff" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4ca60b985413edd7efe73ac4182d8f73" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "fe26110bf7b2812e3f659f12214f3819" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f9473292d267c56aba106431ff83fe67" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c29f90c0a42f3e8bbf78e87b751b6b8a" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c20dbacb1e09030b5bce25a8a7e04a97" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f7b3fb76633dfaa9035d5935d996cfd9" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 29409280, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 14712832 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 29392896 } ], "md5sum": "c5100eb48d27442d94c00a4488a986d5" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "805f0cff424296dae73b9553b7425a52" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "11ceaeed2fd101871cec51e2d723d8cb" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "989362d01a4dd7b8d948a93772a9726a" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "fa11be64db7e41a2c4fc57b104368011" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "325deccf0bd441a6d755ab034a9a5a90" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "202f53b9ced14d065161481ecf6d0186" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 9453568 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 24150016 } ], "md5sum": "d4dbf0cb62def18efce2644f8119e285" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1cbf3c4bc2b81e16cd1faae65dc16268" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a51c11602d0e8d087e76e586b6e3c75b" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "15261172e9babb823719695dbd9312fd" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "bac1bc6006318d2e7a7ff6eb926ae9bc" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "af4338bdb69333c15d3993b6c0d48e96" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "746b20f174d0f98f754723279b62eb85" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 28360704, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 4194304 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 9453568 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 }, { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 24150016 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 28344320 } ], "md5sum": "518837be6fbe84ec3023ff8833afa136" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5627afbe4ea15a77ff73ffc0843cc6a3" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a6f62acba1ecf507a074339f25e28de5" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.51.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "bedafd9749aecee003ee7453bae5ec9d" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4ce077f0f92ce7f9dd395e41b9213d26" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "64d746fef55cad80d1eccb63c06dcb40" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.51.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "9e6dc060d1d4f89cd2625cf6763bee82" }, { "dataPath": "params_shard_324.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "71d3701322d17771b74434e80688efdc" }, { "dataPath": "params_shard_325.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c9d0c90bc4ff52fbc075ab6b3cf1f2b9" }, { "dataPath": "params_shard_326.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.52.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ffcf1760efd1846e0fd03ee16c7a32ec" }, { "dataPath": "params_shard_327.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fab201ade8f497d431c6387aa1e21324" }, { "dataPath": "params_shard_328.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a87e4097b89f3fc502226b258e4b0034" }, { "dataPath": "params_shard_329.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.52.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "80c19450f37148d97b4d764e34c01706" }, { "dataPath": "params_shard_330.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "191e78dddbb25708f56d3b25c08c663c" }, { "dataPath": "params_shard_331.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4868c6ef861c67e4487932eceb24553d" }, { "dataPath": "params_shard_332.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.53.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "44fd1355c345abeb50c1cf044c087981" }, { "dataPath": "params_shard_333.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a77338a152ac3a542bb7ae3e9b336f39" }, { "dataPath": "params_shard_334.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "89346b99011530a9a7a25bde29d3a7ac" }, { "dataPath": "params_shard_335.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.53.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "be81aeb78a03e8c7ae319aa30721aef5" }, { "dataPath": "params_shard_336.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2921a3f0568853b4790522c04717a6bc" }, { "dataPath": "params_shard_337.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "463fefd528fe4fbf9fa3334c82405d77" }, { "dataPath": "params_shard_338.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.54.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "008f27449a8f6448a9111cb570afdf94" }, { "dataPath": "params_shard_339.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c1c38b3dec40f4aa7b0a1670f74861b8" }, { "dataPath": "params_shard_340.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0b341bc2c9ae87e662af8e5294dcc54d" }, { "dataPath": "params_shard_341.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "289985ee804c20a020d1e034f3252836" }, { "dataPath": "params_shard_342.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.55.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3312f82099cfce715aeb74be230c7fd0" }, { "dataPath": "params_shard_343.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a70aedbd310f2f98bc71de6b9e8d001c" }, { "dataPath": "params_shard_344.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.54.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.55.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "60ef5647b5b19b1509b48fa1ed541551" }, { "dataPath": "params_shard_345.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c236944d911cc18ef608cbff232fb326" }, { "dataPath": "params_shard_346.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0d74bac1cfe8eeef7dbd5d3932cf1f01" }, { "dataPath": "params_shard_347.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "819176a80e95ead0f720b1a36a68ef19" }, { "dataPath": "params_shard_348.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "34d941b7d53d6c54d56cad59b5ea4678" }, { "dataPath": "params_shard_349.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2d8bfcdfdfaab370c4da9632b49e153d" }, { "dataPath": "params_shard_350.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.56.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e140da329683bcbb898a0e2cd6f1eee0" }, { "dataPath": "params_shard_351.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "83436178134e2ef3f48ea9883ecb2d85" }, { "dataPath": "params_shard_352.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5291adafdf275f56a8a3393b72ff45eb" }, { "dataPath": "params_shard_353.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.56.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "6b729a335bca538bc947c86268a78cea" }, { "dataPath": "params_shard_354.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "52d177d57b639e9e640fc3d1b8d5932b" }, { "dataPath": "params_shard_355.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "188b8afabd264022cd4b8e86675c3527" }, { "dataPath": "params_shard_356.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.57.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "887a07d475f41fad57f428354855e79e" }, { "dataPath": "params_shard_357.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a8cfabced514a649890771a9da566e97" }, { "dataPath": "params_shard_358.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6561943346c6cb9a5db18cd777b9d038" }, { "dataPath": "params_shard_359.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d208000efd6160209470defc9b7f43c9" }, { "dataPath": "params_shard_360.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.58.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7fe9bc001e2334f8f0960857dbcd55a8" }, { "dataPath": "params_shard_361.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d34e2187fed670ef55ded8a9fe6d2029" }, { "dataPath": "params_shard_362.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.57.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.58.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "ecf3ea174b080bb1c57c244ea5c41365" }, { "dataPath": "params_shard_363.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "303fc151ccc176975e46691311b43f0d" }, { "dataPath": "params_shard_364.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "79b3f4a4874825560c16b90f297d7597" }, { "dataPath": "params_shard_365.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "26b142ef7883607d6a0cad7053b965d2" }, { "dataPath": "params_shard_366.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "08eb098cd9228734b8b833ccd5feb168" }, { "dataPath": "params_shard_367.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1242e129ab84157a5607000f23329a25" }, { "dataPath": "params_shard_368.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.59.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d915ed384a078fbc69fb558ba48edb64" }, { "dataPath": "params_shard_369.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a1e0e1d30b450a87c2e83a79c0709676" }, { "dataPath": "params_shard_370.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b0b08fe6e5d12a0ac150fc2db96210d8" }, { "dataPath": "params_shard_371.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.59.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "4d0e11cd07680b0d1917aeabaf269c63" }, { "dataPath": "params_shard_372.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b87eb3bf6dc823662bec36ec098e9f4a" }, { "dataPath": "params_shard_373.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "75a9dcf8c7d2d18dea609fb79bd2b713" }, { "dataPath": "params_shard_374.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.60.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ad942097920564878773471716c0ad17" }, { "dataPath": "params_shard_375.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2b726417bf42b98d734f1241904ba030" }, { "dataPath": "params_shard_376.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.61.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "44d39e01726114ec6ad137433601cf39" }, { "dataPath": "params_shard_377.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "69fe1608b6eacf679f6bb07379260d51" }, { "dataPath": "params_shard_378.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.60.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.61.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "d9c629430216b68fac1820ed532949c0" }, { "dataPath": "params_shard_379.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e8915cc80736b4d4ea0c074715a6c5fd" }, { "dataPath": "params_shard_380.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "266830cfd0998429098189cfdc624f41" }, { "dataPath": "params_shard_381.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "043aad6f9876af562b21df40d8f93463" }, { "dataPath": "params_shard_382.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a5ece889bf750988b08c61be9da17854" }, { "dataPath": "params_shard_383.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "23ba754d68c1ab714644dee8a0b802dd" }, { "dataPath": "params_shard_384.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "02db731d091840c6c69f77b2cd963bbe" }, { "dataPath": "params_shard_385.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c49ffd3e3b72b60164de91080f33eda1" }, { "dataPath": "params_shard_386.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.62.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f182811a05f69ee1cd489740ffd59b61" }, { "dataPath": "params_shard_387.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ea13f586254e63d9b8d1b510d50da47e" }, { "dataPath": "params_shard_388.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7e9d898b4ff8275a1d3193dd47cd4645" }, { "dataPath": "params_shard_389.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.62.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "4ec26d578d6f702ae7c395a8a2a99baf" }, { "dataPath": "params_shard_390.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "38d853fb33c699dee53f0b6dfb7ec14c" }, { "dataPath": "params_shard_391.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c90ad77cd34211f0d4391ca83552c8ad" }, { "dataPath": "params_shard_392.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.63.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4d2bc009226bba03636e1c89675f12ce" }, { "dataPath": "params_shard_393.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7805dae8fa6c7d9e7575444d0f7b5959" }, { "dataPath": "params_shard_394.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.64.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "99c92c96e0dbefd3d84f04fb8c3bb714" }, { "dataPath": "params_shard_395.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.64.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "cb2c5a7e0edf706d849e74f7daf51255" }, { "dataPath": "params_shard_396.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.63.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.64.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 29376512 } ], "md5sum": "10093d830f1699b6d76d62353d8a841d" }, { "dataPath": "params_shard_397.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9469cec395403e34ea761491f3fc6408" }, { "dataPath": "params_shard_398.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "10bf9044533bde111706f63ed51d61bd" }, { "dataPath": "params_shard_399.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a55040ad7d9f37aeab72a71dbe246664" }, { "dataPath": "params_shard_400.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.65.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ccd4a96547ad32be9c825917706e9375" }, { "dataPath": "params_shard_401.bin", "format": "raw-shard", "nbytes": 18907136, "records": [ { "name": "model.layers.64.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.64.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 14696448 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 } ], "md5sum": "b6861bfaa87d89d26d4a31e363e5f61e" }, { "dataPath": "params_shard_402.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "86af58194c6ccafb2e94a8dbf607883e" }, { "dataPath": "params_shard_403.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ddaf230c03670673b593080b15a5ac39" }, { "dataPath": "params_shard_404.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.65.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "de0d99fa256ca0dd28d00916390b65f3" }, { "dataPath": "params_shard_405.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.65.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8afcb22fafad78a4a3e1b4166b9c5208" }, { "dataPath": "params_shard_406.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.66.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e070a8d326d1add4c9d6e212c3613371" }, { "dataPath": "params_shard_407.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.65.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.65.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.65.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "4b9a908b71c97238499aae28d853bc55" }, { "dataPath": "params_shard_408.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6355f2ee087f936200daa3ef4ae31cd8" }, { "dataPath": "params_shard_409.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4371d00732964a1ee77bf4998a8a89a4" }, { "dataPath": "params_shard_410.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.66.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "61c9ffaefed4e1f14cf3da28c37078c9" }, { "dataPath": "params_shard_411.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.66.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ffe5bfb04201f9f1b40fc353b40dac4b" }, { "dataPath": "params_shard_412.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.67.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "39d0fd25b8c1a1eedc596800c645660d" }, { "dataPath": "params_shard_413.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.66.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.66.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.66.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "1fe1fcc72e785973e7077f0df0cb45d7" }, { "dataPath": "params_shard_414.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4c3abc73b8f55c0cb44205d8fa702258" }, { "dataPath": "params_shard_415.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "66fd0ff9e94d9576dfe68f0272044580" }, { "dataPath": "params_shard_416.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.67.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2ecd345ed9eed12eb9a60e00ef571845" }, { "dataPath": "params_shard_417.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.67.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "17340101984327988bee1c5c55f9b5db" }, { "dataPath": "params_shard_418.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.68.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "211ae376b493fbbeefa94aa83ca7c529" }, { "dataPath": "params_shard_419.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.67.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.67.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.67.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "1b0d5893812e53446624ebedec8c89ee" }, { "dataPath": "params_shard_420.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6cc26e04498ebbda4805293529ebd9d1" }, { "dataPath": "params_shard_421.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a59174f7bae2d9b573ae27e81d324539" }, { "dataPath": "params_shard_422.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.68.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9f6293eec6ee5d3a5d1f2aa7901f7d52" }, { "dataPath": "params_shard_423.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0c2d83d0f5859e6d95c9e85e02e3943f" }, { "dataPath": "params_shard_424.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "bbcaf337bc58ab1307f8276c26052ca8" }, { "dataPath": "params_shard_425.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "680ed7fe31f8c0a3ad1fb9c1f7dab109" }, { "dataPath": "params_shard_426.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.69.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d60358738371010b094f274b73919e5b" }, { "dataPath": "params_shard_427.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f0dbe948e5768cee98db080f94542f97" }, { "dataPath": "params_shard_428.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.68.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.68.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.68.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.69.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "bc57b5fb08e274aebb5684932612a35e" }, { "dataPath": "params_shard_429.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.69.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "3898576831d24d3b4ef5a3e38523abe8" }, { "dataPath": "params_shard_430.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.70.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5fe1d579888328b62cbe46a79043dda2" }, { "dataPath": "params_shard_431.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.69.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "c18ea74167c91b7850904dbbdb635182" }, { "dataPath": "params_shard_432.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6f33ee234ecf24e753192d4127bb7f1f" }, { "dataPath": "params_shard_433.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f8d795073f8a3f0923f39243e60de9be" }, { "dataPath": "params_shard_434.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.70.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "af1ceaf31ad8390c5be32fd40cb43537" }, { "dataPath": "params_shard_435.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "67ffcc5cd3f4fd00d8dad6712d8d365e" }, { "dataPath": "params_shard_436.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.71.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "aaa113c019ee34c2022fdd8e126824b3" }, { "dataPath": "params_shard_437.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.70.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.70.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.70.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "a233344fc88d9631dfcf3b3856c9ba68" }, { "dataPath": "params_shard_438.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3b604653db12ce7ddbc00764ba094189" }, { "dataPath": "params_shard_439.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3d11b64572a2b16a9e4d948e0236d429" }, { "dataPath": "params_shard_440.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.71.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "683beffba1b40e927bfeeaf7052af290" }, { "dataPath": "params_shard_441.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.71.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7fa4a2d1cfaa7a27ceec674bac8335b1" }, { "dataPath": "params_shard_442.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d3ba7bbeddeb2315b20cc3d4da644b63" }, { "dataPath": "params_shard_443.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "41541ed033b7ad196319ce92ec1473f4" }, { "dataPath": "params_shard_444.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.72.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "fb61294d6907793af6fd3c1ee75484e1" }, { "dataPath": "params_shard_445.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2483263f839b27c1878b90e1bf3f6789" }, { "dataPath": "params_shard_446.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.71.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.71.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.71.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.72.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "0abb28eb897ec76196f10bbb7f99cb64" }, { "dataPath": "params_shard_447.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.72.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "8dae16dba0041d29e14403e257385770" }, { "dataPath": "params_shard_448.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.73.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "612bd13f3281f11c32a9e050bc119ac6" }, { "dataPath": "params_shard_449.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.72.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "a3f5000b849027bf2ddc0b31b52dfa10" }, { "dataPath": "params_shard_450.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "73dd1dd663b59ef848c9cc409cc25875" }, { "dataPath": "params_shard_451.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "692f35b0583b7bdbce78038ee910c5f8" }, { "dataPath": "params_shard_452.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.73.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "dccf5ee8fbbc4d0dc0ca9077d9962e16" }, { "dataPath": "params_shard_453.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.73.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6d75dbad358cf2aae453da4be2e12954" }, { "dataPath": "params_shard_454.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.74.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9430d6a8e123faff99fd45f0cbe3eaec" }, { "dataPath": "params_shard_455.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.73.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.73.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.73.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "a6ac75b8cb46a18c2f077705ba09d2e7" }, { "dataPath": "params_shard_456.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "7cc7ff4828739e0fc4095ea672577565" }, { "dataPath": "params_shard_457.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "750e8ef10347d342dead48fc2a93337f" }, { "dataPath": "params_shard_458.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.74.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "cd3f6e092b736a09ce51d73ab27fdc56" }, { "dataPath": "params_shard_459.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.74.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "969d0fd3e0eede9d872317b2186fa2ee" }, { "dataPath": "params_shard_460.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.75.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "bd607e071da7767998ee15fba23cd621" }, { "dataPath": "params_shard_461.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "78eeb04d2e4b2d3fb135319d647fc7ab" }, { "dataPath": "params_shard_462.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.74.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.74.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.74.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.75.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "4af92d6d47b935a714dcee401216de76" }, { "dataPath": "params_shard_463.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.75.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "def5e39fa599859749818c28df45e256" }, { "dataPath": "params_shard_464.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c39873d767f15cd82134dcfb36dc0031" }, { "dataPath": "params_shard_465.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e56160a7030bb0bbaa75775e8087d1b3" }, { "dataPath": "params_shard_466.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.76.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "38300ac9d335a407ebfccc96418dc57a" }, { "dataPath": "params_shard_467.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.75.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "730d0e1eb9d1024ce008d4cbff277f10" }, { "dataPath": "params_shard_468.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f0b02345fa2f4f24c0e01c613fd8936b" }, { "dataPath": "params_shard_469.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "60fa166ecd7ccd181e4c7b9c737fa804" }, { "dataPath": "params_shard_470.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.76.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "17ea38d738eff0903fbd4518d6f1b3f9" }, { "dataPath": "params_shard_471.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.76.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cc4c8af9e61171216af965a06d04a0b8" }, { "dataPath": "params_shard_472.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.77.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "raw", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "58deb5c479d90ba6c0b40f5aec1f9bef" }, { "dataPath": "params_shard_473.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.76.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.76.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.76.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "89de06719dd7ce85d6b74899b8801f3d" }, { "dataPath": "params_shard_474.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6e038622b68c6d0e34a3e0a697990863" }, { "dataPath": "params_shard_475.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "854cc88e2afb3bd7c53ec8c779c12c2b" }, { "dataPath": "params_shard_476.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.77.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3d76d4a02616464b9bfac2b847489df5" }, { "dataPath": "params_shard_477.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.77.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "32e29fab30e31e89c66eb30d6c385a56" }, { "dataPath": "params_shard_478.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.78.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "raw", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "65bb3c8cd5db3ce561eac46c0a3b69e8" }, { "dataPath": "params_shard_479.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.77.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "raw", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "raw", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.77.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.77.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "raw", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.78.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "raw", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "eb71c08dc01ede0e33e7e89b088985d4" } ] }