{ "metadata": { "ParamSize": 325, "ParamBytes": 3790746112.0, "BitsPerParam": 4.500454373320414 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 65538048, "records": [ { "name": "lm_head.q_weight", "shape": [ 32001, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 65538048, "byteOffset": 0 } ], "md5sum": "0b3ebd70aca35d52e03c2d5018a8c969" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 30744832, "records": [ { "name": "lm_head.q_scale", "shape": [ 32001, 128 ], "dtype": "float16", "format": "raw", "nbytes": 8192256, "byteOffset": 0 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 8192256 }, { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 8200448 } ], "md5sum": "9c1bc360f0ecf22788589a67853fdc94" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "dabc232391cb802d5ae7225e191f2af5" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 28196864, "records": [ { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 2818048 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 2826240 }, { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 2834432 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 25378816 } ], "md5sum": "af0bdc8ae4dbb34ed9b6443616b25b7b" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 30810112, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 5636096 }, { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 5644288 } ], "md5sum": "fac5d884674e80561259a7af8498af6c" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "f3c03610dd66d0ecac94de161928e87a" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "d55ce09904032b3e0e5b1db1f347aeae" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "9c247ea57456c0bf42c2dc45b326cb50" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "f09e02b7c2a945c8ec9c75a792ec4992" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "b998ca4be1e85ca59447a157d57cdf34" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3c4146adf4307c25cfdebf4cfdc638d7" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "2f63d727d22e6580e733c5091ef8657b" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "1e40b3a7d343b395e0214a074b2bca48" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "526fa2fc34fa154ccb90c3cbc9d375d1" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "9853e78ed5658d555ae2ef7e2dee1471" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "29f4025521308e187a349323225d138a" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "f3b4df025c50effe90ff82d7b2782d45" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "9fd0051093d358059203e429380f5481" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "8bb87bd11542c42745ebf6eb6f0b161d" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "41ee252c58b75257414eeeac3541c896" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "a5ff5bb6c885549542a251a546b1cfd6" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7fe1f5947fb5bb379ec79ecab34ac8a7" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "bda9c0be12ec9d7df1da99fe958e08fe" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "868ff6271b69ae977d29dab70c5b0b68" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8e7173a68ba9cc5163089c5367c584ac" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "9a5bf48b8df43210b4b23352bb044c69" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "72c731a34eed54671ac7708f237f55da" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "b70b24cdf30bee449e39c555bf3d8b45" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c3088ccb7ed96882e320338472a41f56" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "45110eeed5ba47df174a762b81dbcf7a" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 65538048, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32001, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 65538048, "byteOffset": 0 } ], "md5sum": "8ed02b9f78e2e5592f90392d394bafcf" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 31801600, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.embed_tokens.q_scale", "shape": [ 32001, 128 ], "dtype": "float16", "format": "raw", "nbytes": 8192256, "byteOffset": 1056768 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9249024 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 9257216 } ], "md5sum": "64e01f1ea8261899c565ad6360835981" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "aca70bdd5a543ee4405a02cfd78c5e85" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c0d17c957c83c1631e55889b3249e6ff" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "950eaffe8ce5c401c2b785f8189e183f" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "81649a612c582acac7fc7361aeb54a7c" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1b4e3989738996821333ac9fb8e12684" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "b4d5b46de3a8412dfd786a339a135be2" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "22710b0e21eff23ea2864f7c0c6f52bd" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "f71dd7dc8e8158bd06bd4ed009440007" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "daed3e318bf8c7086b4f02d7ce485ae8" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "cc986aed592d5db140336b3c7ee8e848" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f57a7ab19e23241e01f267d8428b0a5c" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 29827072, "records": [ { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 21045248 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 26681344 } ], "md5sum": "69f2d4853bb9dc6c17000fac0ec9699e" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "58efd9b9c02ea60f2927a91b6210aee5" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "ede69d1ac98e239b39b69bf1f4755a00" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "66774010679fe3b4e37d8e55090aaffc" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "ab88a106ca1130094d27253c241b21c6" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "9a680e29dd6ae9de019b467bb8b5075b" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "9682049cb8728e73c0d44b51e055d300" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "6e42a99181dce715434e9fe3d7e6a483" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "a265590f56bdde0992fa6bb71dc174d7" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "82a3799b6312b4331da0586b1152653a" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6fbf365acee1417d03f5793508fa6bde" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "2ddcb52d7820022b40f829eb1618f9ad" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "cbb3be1ae75782c2f42d44ce19fad1bb" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "245d5e7b4dfee527db104c1865b0e547" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "35d2b115a8422c007564e9bebbb5fcdb" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "6c88ea7a9858c21d5a8abf8412112682" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "e33f151d19b1a2cc04ba96d060b716b4" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "5ceafadf2260c427417f94d76862e7e7" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "0f9e6f28731bc49ea73cc2d501e73b45" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "0e478c490776773d77fbb2fc15759525" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "deef0080d44f62d4921370d3ec358771" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "83b6db7746123148a40f82ad3ca45e4c" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "79a66aaa92f6e442ce2ca6f2980b6382" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "26523fa0b68ab48fc5df47bbc4621ca2" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b9227427bf37b54af600038d3406298f" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "6cd54d882465841f565ee7c509975184" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "9ab9a2ed40976e80905479a675320e9e" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "25c9aa96de13018b29cd99977dc3c4f9" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "208cb28d3fc49c0523721e56bdd55ea8" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "0548debe814b50378235b0950a25fba0" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "35b302f56835854854b59180f5fdeb20" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 28196864, "records": [ { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 2818048 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 2826240 }, { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 2834432 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 25378816 } ], "md5sum": "0fd67bebd928493b132931ba9b2510a6" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 30810112, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 0 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 5636096 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 5644288 } ], "md5sum": "26ae1730eb7d58c245392080a1a813c1" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "f7b6b5f1a56df4e75d666e4b66df900b" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "57299857b79cc2bd6855159876fdf8e8" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "55addba4025e13bb8a00baf008683b0f" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "911f9db742526e12af92ea8db69ae330" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "ae90902a05f4a3bd4679c71a499953a1" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "672bc12b8de3e4b931c2135226a0cb08" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "48db3e3d83bf002877fc456578ce0d84" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "911d424186f615e605269e4efe092fb9" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "c120464ef04d29428b93298388d6f71c" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "dbc27da3977b84a325d92d301386234f" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "313259244061058f92ed11da88027538" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "2f5ff300fbc97879b7cf65440d6ff809" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d79122042fb9e249e9953bea936e8aba" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "3f54dd363a6fcff28f8b61735722140e" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "bb7bfe44f6cb854416bf724bd0c8ef1a" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "d87489d490395f7b2f91cecba207a874" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "77b4ffe5fbc320b16d9012361722bc83" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "524d9257497efc82bd3d220e1126b5dc" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "16678eec83b3928def8d390a63d16e0d" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0f0327ed1513c32817900802bffff7bb" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "81c675385269314d24a4d19e4fb56fb1" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "3a72bbf6844b7f1902b910c8c7427720" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "76b66684d45d065f3b844d2844c81bf0" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "74b4aff75aa8b52deebeb1e13ad59a67" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "2aba209ad880528cd6f4e7fa4b4530ed" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "04103804284580073d29a36873520a8b" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f71b53ad8aa0e45e1571127465c899b6" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "96183f3334045c81e7ddd5fa5babb459" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "7e993afcfbe46bea9f5395c6e54dd0ab" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "8e792e6c7e64b8302adb3dd65fc31997" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "627b9d2e53d49a76b02fb201614bdff6" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "0adb21d777bcf6d0460675c00bc151e7" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "ea8b4b117e808c5c3c9ff38b71bb0250" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0b55df005f5f200c5bf9cf6e91ee168f" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "4d30991210103b5dd824e9696b0a4761" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "b25de963e65fd39c3fa7b28ab224f32d" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8f0e78c70b05356916e08bceb2f927a4" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 30801920, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 12582912 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 18219008 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 21364736 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 29753344 } ], "md5sum": "ee7447efe20ada46bf3bef3e300a5dd4" } ] }