diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,4311 @@ +{ + "metadata": { + "ParamSize": 325, + "ParamBytes": 3631664128.0, + "BitsPerParam": 2.6739310072364444 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 211365888, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 128256, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 211365888, + "byteOffset": 0 + } + ], + "md5sum": "48c08152fc14b2a618e78437162e4262" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "b7bd9b67ab59e38c00fba18ea2f3ba84" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "28c0b02fa1a69aff387f35ab19e926ce" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 29369856, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 128256, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 26420736, + "byteOffset": 0 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26420736 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 26428928 + } + ], + "md5sum": "6c9827ff5c02bf722e115fe7e4069933" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "7d3228d96f64aa54af6787d3bcd01834" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "0be95009a2234474dd771519915dee70" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 5914624 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 12664832 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 13508608 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 23633920 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "08a07d3318553147f295efe7271e3ef8" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 211365888, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 128256, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 211365888, + "byteOffset": 0 + } + ], + "md5sum": "fc41b3530388be6f5b5f670666b46076" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 26420736, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 128256, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 26420736, + "byteOffset": 0 + } + ], + "md5sum": "7275a19f5d55f404043e9e4e380007e0" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "dee42fa2b52538657933945571b1bdfb" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "1b75bd9db390a195350925cf3b34838c" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 27856896, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24907776 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24915968 + } + ], + "md5sum": "b5d028c259ffdd1f953e4b08d2480160" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "52f5705f444735ecd47c944d732092ee" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "fa50e96e2cbf8158ff1f5fd9282c089c" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "f4ba04f1756a7120aeff3877a3ffef42" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "f33d719b68d257d66eb7d1f3673d778b" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "a496da0c6f5fc26850b9bb5666571d20" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "29dff366bda388c0158bd45de6e46069" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "c36d92e1674cea560c727a3151ed7c41" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "9e41f534d1d9d9963424c96d04ad42e7" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "12268871ad14631d5742a20823f74ff9" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "4a1a6a45fc08582b48ccbc3b10f436df" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "1b3794bd75f14e183a259f03e48eba17" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "b713d79fcc147131121c8d8f7a4d7b1c" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "c3e99d3b9ea603dc15c2444543aa59c0" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "72ebe02270dac26ddb7a45a78b938da4" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "5808a28d4022aed03b66281b0cbb6c05" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "a151a2f2a6e4d4699fa1e721374f32c5" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "21a048ca4b94cf5b12ffec3b7b64b1b4" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "8c286af07f8e0aa287bf0238985a7de5" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "5bf42152b65361d5d93f1e56f9db313a" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "fe4e889b4c33913399aea01c3b3003a8" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "da6a5540333fdfbf1a471cce775e69ba" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "f856a2b0f782f989c62cfaedfd1997b9" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 30806016, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 24899584 + } + ], + "md5sum": "48c64694bf7132f81f9216107a34f0b2" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "50725ac9c30ae01186106238c9c9d7d3" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "de5fc371ae42ebe9be500a497c669e93" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 21934080 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27840512 + } + ], + "md5sum": "d209c888cbf92d03e926ca0641513c55" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "e2455e669ca1260d668bfac1830bfe50" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "4a1e7cbcb6d2af3f014462387528d1ba" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 21934080 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27840512 + } + ], + "md5sum": "893ac3c6a252e74f3e729e056dc5c4f3" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "039f4547b7dca5e638fd2fbf0f6bfebc" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "e0a7b2f20b5fb20cc79719c94e18f19b" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 21934080 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27840512 + } + ], + "md5sum": "bf4763bf74dcb654da55be46a5fb951a" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "558b2d10897355831d728de7e691b689" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "1065a4e2f17271df7fb27d9ae794f1c5" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 21934080 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27840512 + } + ], + "md5sum": "13195f49786e7f97ae10c38e3c77e380" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "42f35e0d801e962fb672c7d9f632f3c4" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "cb132819c8fde910bc16ce0cf76d2f7f" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 21934080 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27840512 + } + ], + "md5sum": "113d486f310d4589ab1d85bf734c9973" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "6a04bb380a2f509ddfac54638160a754" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "9746c2326c61895202b66e9625b6bc4c" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 21934080 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27840512 + } + ], + "md5sum": "4e50d47c68961d200b4838be9325ec91" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "45d458b17b6c2001b06ccf401e5f8385" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "1b08199798ec2bb5ef47672648ef4165" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 21934080 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27840512 + } + ], + "md5sum": "a65549689d8d73d620e7177b617fcf87" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "f4f0ef55b99f5a8075854c66b5b2aa75" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "3f1548b107e2971199ff104e8ffcfd25" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 21934080 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27840512 + } + ], + "md5sum": "473c3d2507f67858d231c93c24806e4b" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "e39873a8775fad6fbd015f8210b99cef" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "0175f965590883f64dc2935b129cd673" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 21934080 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27840512 + } + ], + "md5sum": "602aadef4f270446859594800411f67c" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 30375936, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 18984960 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 29110272 + } + ], + "md5sum": "faa5bf5482cfd87cbc5fd8a0ed08b9d8" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 31129600, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 7602176 + } + ], + "md5sum": "9e9dcf19b65d12c8b7d396a9b5c1d2e8" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "41a221733cd9163cbc4bc2a4d1669252" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 29425664, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 0 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2940928 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2949120 + }, + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 2957312 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 26484736 + } + ], + "md5sum": "80552fc04650aa533b025295968a27e8" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "9c58dee3bef2b89a143a1dfd66e5d763" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "115c3a1543da92494c79d0d2f9423a42" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "feb74b1dfb0c6aa81e3197ceb1fe0d79" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "ff2e6061fc577af9153e247b8e64f6f9" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 32391168, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5914624 + }, + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 5922816 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 29450240 + } + ], + "md5sum": "0763778bd4527dd49d909a27e0363b46" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "ffac8a861d98b264d63bb6720b886e72" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "e2494c74f2db2c2eaf20c359e8ef6fb8" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "da422d03a9be8564bcd4159a84f46077" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "afc1a70e86293c221777250bbc4a1ce9" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "be365d0b81c2ee695369ba100ea56db2" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "f70cfed9fbc0410fa6b0797235cdab7d" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "ede19a6adac155e641a20636a25c565c" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "02c25ccb2ca599106bc5ecb1c8324a9c" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "662bcf93de2404a25c60dbc309e38b31" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "1ede2476ce7173f35a1cb49a6cba4e04" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "f7980da4d6e8ce62759af827d8c69b0c" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "f304b900f29477eb0882c96e14899046" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "e843d7080faacd9c9e8f52fb87b0e7ef" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "2a5b07fcbafbb5d0e0b785472a6d2efd" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "d21b88653c36c0f66734079e4c60ab45" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "a197a4b56b9d1bb94b6927085b58ca98" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "48dae4b9ea3f4551e74c9aa45c125735" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "399899da1765cfacb382cfa5d8a9fc97" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "531448467d8018eb56a925eaa570e943" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "5e59bef0db8890aff878c5bd74751a4d" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "0159ab1d84d4ea4d742f4a8f3bd94382" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "3b682f5fadb8d5f1f1ecd87ff011670c" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "7901da496411db5e4f0b174d1dea0a79" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "fe16320cd078cebca0137a431c5b6f3d" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "8123f8a798bc7e2c71279865e3241b98" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "8dc8529234a734394f5de3828d8eeac7" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "9f20bb2ed26d17cbd963a7d599f4eb03" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 24899584, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + } + ], + "md5sum": "0943559980034c1ec5bdfd0f9b0e25f4" + } + ] +} \ No newline at end of file