{ "metadata": { "ParamSize": 325, "ParamBytes": 3274746880.0, "BitsPerParam": 3.6176393565388256 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 52736000, "records": [ { "name": "lm_head.q_weight", "shape": [ 32000, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 52736000, "byteOffset": 0 } ], "md5sum": "f67da1946680ae7d8d77f42e6c8d0139" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "74f1896c1b0e21a31876805b868400a9" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 33068544, "records": [ { "name": "lm_head.q_scale", "shape": [ 32000, 103 ], "dtype": "float16", "format": "raw", "nbytes": 6592000, "byteOffset": 0 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 6592000 }, { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 6600192 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 30127616 } ], "md5sum": "3542d63bdecd0a1b71caa63283905e09" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "103c673a7a6d95a23eb84a23c3b1ef66" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 32391168, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 5914624 }, { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 5922816 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 29450240 } ], "md5sum": "61a04ddb2b6ec41ce07ebf6ea01c0208" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "de5f5ae384c4aedd05b734795848b27b" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "9d28b00604e6e9fe92f9ceb77bacdddd" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "cc2785c1c6c5aa6eec1a11c2216f6e36" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "d04e626adecb77d1d3480ad60964a4a3" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "f89f85d1eca368e6e45425a4441fd798" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "1b84df8f261e18cf5dde26b12e91509f" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "fc1aa2a18621b8107cab00f0d91afc7b" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "ccce56372cae8713fb9ce86353e7d628" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "61df165a88141f3f4d45be5313fc25a1" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "7163e98494e9e380c4cbb4b1e03d299a" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "9ab9e4af8e254c3f0fd33571d6c783e7" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "9fb1abf80670eda7ffb752c00e808edb" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "a31a16fb2733308188a5d1b4f4ace1dc" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "c19b76ccd4eb39990f551727c3701ac5" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "cbeb1da9bad09f03dc98c3c687c868d9" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "4793720031308ef5f27b5334bea39185" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "3e679c025dbe3f30a5961da3b628fed9" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "8affa5728bbd35639c308275b486806b" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "5b3eb629ff18d90497c2371cbcb3f4b2" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "f525d2bfeaf887246ed859b8f755cd0d" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "9c737177a56c65a7d7d6c1a57e497fdd" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "8491c1e00fa00b77173137971c8c43e3" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "cd0ecb95f8076125d9d14276c88499e8" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "db9a9aa32c677d3ab47c2f9290686b59" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 52736000, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32000, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 52736000, "byteOffset": 0 } ], "md5sum": "e2257967b263fdbc2a1e364e7956e560" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "7adc243f5d794805749f676aae98db0c" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 31507968, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.embed_tokens.q_scale", "shape": [ 32000, 103 ], "dtype": "float16", "format": "raw", "nbytes": 6592000, "byteOffset": 24907776 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 31499776 } ], "md5sum": "20fa9c51bb59e2b9f8b218367eba7412" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "88ecdeab745f3f484e4619bf91b55264" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "b6bd9d102cfd2d1ae7457d21373122ad" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "06984ce2021fdd85254c1d7d48cbe9b9" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 30789632, "records": [ { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 0 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 2940928 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 8847360 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 8855552 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 18980864 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 20246528 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 26996736 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 27840512 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 27848704 } ], "md5sum": "fdf8d12fe6f2f6df16e16bfa1cb02c1b" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "a0a1c7022f5b85a9c90330067c90254a" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "7b7eb795e4fb992b8d2bf83edc704333" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "2a59a74d84f59c8822d2311df52c3b32" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "11434c93bd4787c3f9e4ccd628fdb59a" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "921126d0751a74e52b4b867066f1844f" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "a02056bcb66a8cc75cff654a5312c760" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "d76901fd21acb5c70ed6bcdfd3d28e8d" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "c6a91403a7048d305850e3e87cc65c99" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "298dd500fd2b0bb691b76c1396d251e5" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "1a0c1b8f67c5e867a741126b1b04f093" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "8567789683c7df934e586a14bd2905cb" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "072808ff80510421c5c2b35a9fdf0ba5" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "e327bf4ae6a3613a67402667e65a8218" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "98a79665fee4769259d56514284f0880" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "c467477f5726b0c9890b2079d03f0a00" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "6bcb3247e8a0c1f7c69e7360fabeafd8" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "54598baa3e7c8a2033aa676342054f14" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "23fc6b63efec5dc55c02243f81517034" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "56724dbb6a6b1c3447b8566bb53bad3b" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "80f05533a74f61b5ddefde3442da93dc" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "ef12c9b8511e457eb59d9212508b9d0d" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "843861cc44374ea0cef76fcf54936c26" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "dc802e8e16c4153d3af51547b9a99e38" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "dc2f998cd5d938dbeae4e06f22bd5fea" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "20abd4f0a2c91a43c8dfc789a4575aa6" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "3fbba2721c0f325955a9ba1e479dfec8" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "0c5e40aa38ccd0a99d019cf01b8ae462" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "f74a39bfc981db835c35265b935b2b33" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "3510c0ef58fb75f18c6a379586514a42" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "d5bb6fd5d4d5fbdde5f0f57153adab5b" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "2b9620406fffe92a066ee5931ae207c6" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "7631dad4feb830e9d2c71e58fbceff8d" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "fbb8d04874a6b5e3b579e42dc3b40c35" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "4ab9eeb7565292ea0b57ba57714b212b" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "b5bfe8a444605704997f89033bc48f9a" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "c6182095d3c1b7faf62c3dd70540eaa7" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "2e22a212f4a7b36ac43b9da72c81a2a1" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "feefcfc4728317d8a6d7bd38fa677a96" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "031f13b52f551e1d2735483dc080be84" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 24899584, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 24055808 } ], "md5sum": "e61e2ce4b91e5b6a0d2abed771591739" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "d38bbcfbc7c61f8425e1a40e83c115aa" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "e1c030aed188e60b72fcac97f2f565e1" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 18984960 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 18993152 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 21934080 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 27840512 } ], "md5sum": "9575724acee9829046a0ee24f222ae69" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "d6cd1d3e702e02253a15a9aeec0f7952" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "eda488a7c322f26023a4c6db5b9c32ba" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 18984960 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 18993152 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 21934080 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 27840512 } ], "md5sum": "1fc6d3e063e33942f093891e6bc2a611" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "dc0b9fef102da0a4e6435e06b6ec9cab" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "0438ee57cc5cb65079603adab003ccc2" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 18984960 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 18993152 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 21934080 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 27840512 } ], "md5sum": "a70b01aae4a6a7a383fb375a125af504" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "785ba5fb5762a1846c2112672ad2c148" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "3a27766306011706ed19ac4e7286dc54" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 18984960 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 18993152 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 21934080 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 27840512 } ], "md5sum": "8fcaf78b5d385742ad8b5b5e792ac339" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "a67934b4af100b4d7d1fab21b6f07728" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "1c77bc9c92aa138251c42d72b2e6f460" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 18984960 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 18993152 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 21934080 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 27840512 } ], "md5sum": "e1777306fef3c65d4539fbb41b0f0397" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "da5e225b4eaf1cf66ac293fcdfb6c1a8" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "4a5f0de97134919abae7a8ade6230ac6" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 18984960 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 18993152 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 21934080 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 27840512 } ], "md5sum": "33b4764148f7fe6f457344ce3287850d" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "raw", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "db33ef0e7697502146c559ca7c1f0cc0" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "baae13eaaaa31b3ba1ba7c19b9b82dba" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 18984960 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "raw", "nbytes": 2940928, "byteOffset": 18993152 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "raw", "nbytes": 5906432, "byteOffset": 21934080 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 27840512 } ], "md5sum": "dd752d9b659bd7df85f744c9fe58aaeb" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 18984960, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "raw", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "raw", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "raw", "nbytes": 843776, "byteOffset": 18141184 } ], "md5sum": "dac17da2e5f29415fa44652f902d28ae" } ] }