{ "metadata": { "ParamSize": 325, "ParamBytes": 1970540544.0, "BitsPerParam": 4.065377299522369 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "compressed-shard", "nbytes": 49250304, "records": [ { "name": "lm_head.q_weight", "shape": [ 32064, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 49250304, "byteOffset": 0 } ], "md5sum": "eb6c6a847fbfd1162066cadde20450ea" }, { "dataPath": "params_shard_1.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "5a6c20a7a3a52aa726c286a3b6893aaf" }, { "dataPath": "params_shard_2.bin", "format": "compressed-shard", "nbytes": 33168384, "records": [ { "name": "lm_head.q_scale", "shape": [ 32064, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1539072, "byteOffset": 0 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 1539072 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 1545216 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 14128128 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14521344 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14527488 }, { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 14533632 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 27116544 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27509760 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 28296192 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28302336 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 33020928 } ], "md5sum": "04f8bf09d2af9918b7df89b4edd21197" }, { "dataPath": "params_shard_3.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d0a8e4c0b9c12960a589676812dc848a" }, { "dataPath": "params_shard_4.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 14155776 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14598144 }, { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 14604288 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 27187200 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27580416 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 28366848 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28372992 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 33091584 } ], "md5sum": "3267f29e7482663ef0619967060a06b5" }, { "dataPath": "params_shard_5.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "324ee43533eb5b47c7c79b9defc623c0" }, { "dataPath": "params_shard_6.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 14155776 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14598144 }, { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 14604288 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 27187200 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27580416 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 28366848 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28372992 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 33091584 } ], "md5sum": "6e47b938d5f649b8b79e7fbf88e486d8" }, { "dataPath": "params_shard_7.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0ef6d1599484d65121b786cc53521e8e" }, { "dataPath": "params_shard_8.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 14155776 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14598144 }, { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 14604288 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 27187200 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27580416 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 28366848 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28372992 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 33091584 } ], "md5sum": "289061b98404c9abec8f91c9251d3072" }, { "dataPath": "params_shard_9.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "278b3f3a19b2d42e9b9f41ffd29d82e5" }, { "dataPath": "params_shard_10.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 14155776 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14598144 }, { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 14604288 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 27187200 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27580416 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 28366848 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28372992 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 33091584 } ], "md5sum": "7c5a2401e6b50b14c34530a3770ceb7a" }, { "dataPath": "params_shard_11.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "26fa9208e19337b3f57fb900f021fe19" }, { "dataPath": "params_shard_12.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 14155776 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14598144 }, { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 14604288 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 27187200 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27580416 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 28366848 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28372992 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 33091584 } ], "md5sum": "e3b3a486e7e322ac7da2486342ce3a48" }, { "dataPath": "params_shard_13.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e6474757b726f342cf4ecd4cc3012ff6" }, { "dataPath": "params_shard_14.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 14155776 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14598144 }, { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 14604288 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 27187200 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27580416 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 28366848 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28372992 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 33091584 } ], "md5sum": "f3738d8a47e8264cf84db7605cecae6b" }, { "dataPath": "params_shard_15.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3c5d38dff4cafe22256be044e955b63a" }, { "dataPath": "params_shard_16.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 14155776 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14598144 }, { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 14604288 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 27187200 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27580416 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 28366848 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28372992 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 33091584 } ], "md5sum": "37af502bc0ffcd65dbd2c7a0f406fbd9" }, { "dataPath": "params_shard_17.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e5dfd707c17d916c784066518e69ad02" }, { "dataPath": "params_shard_18.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 14155776 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14598144 }, { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 14604288 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 27187200 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27580416 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 28366848 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28372992 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 33091584 } ], "md5sum": "d17aec96f008a12993552c8d7d71969b" }, { "dataPath": "params_shard_19.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ba2e3ea6a8059582b9f0934342978a58" }, { "dataPath": "params_shard_20.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 14155776 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14598144 }, { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 14604288 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 27187200 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27580416 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 28366848 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28372992 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 33091584 } ], "md5sum": "699613dda6f0fe135194b4578318776d" }, { "dataPath": "params_shard_21.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "658fc2482db51babf8b021661117444a" }, { "dataPath": "params_shard_22.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 14155776 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14598144 }, { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 14604288 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 27187200 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27580416 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 28366848 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28372992 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 33091584 } ], "md5sum": "b387fb00bae80f7dbb8a7da706b35f0c" }, { "dataPath": "params_shard_23.bin", "format": "compressed-shard", "nbytes": 49250304, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32064, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 49250304, "byteOffset": 0 } ], "md5sum": "f02ac04b7253cb42f68bf29676b2ad61" }, { "dataPath": "params_shard_24.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ecd23c123f7b1c0a2ea15716c9fe645a" }, { "dataPath": "params_shard_25.bin", "format": "compressed-shard", "nbytes": 29918208, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 14155776 }, { "name": "model.norm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14598144 }, { "name": "model.embed_tokens.q_scale", "shape": [ 32064, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1539072, "byteOffset": 14604288 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 16143360 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 16149504 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 28732416 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 29125632 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 29912064 } ], "md5sum": "5cfa6cf8db84c96c8451a877ed6b132f" }, { "dataPath": "params_shard_26.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1327ccf0b27d40726a2a4b05ea217d3d" }, { "dataPath": "params_shard_27.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 4718592 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 4866048 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 19021824 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 19470336 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 32053248 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32446464 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "80a2041d77a3a10fbe1ae6a417c41cec" }, { "dataPath": "params_shard_28.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c4543861c7594597845249e6024ce611" }, { "dataPath": "params_shard_29.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 4718592 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 4866048 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 19021824 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 }, { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 19470336 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 32053248 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32446464 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "b3fd82de236c17f7c1461f2316bb50cf" }, { "dataPath": "params_shard_30.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b7b4f7a768891add157e66dd2d80f10d" }, { "dataPath": "params_shard_31.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 4718592 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 4866048 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 19021824 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 19470336 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 32053248 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32446464 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "36a9942dd3d5cb4d0da338e95415a337" }, { "dataPath": "params_shard_32.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "546bacf3a84749c40fbd6fdd196ee82d" }, { "dataPath": "params_shard_33.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 4718592 }, { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 4866048 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 19021824 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 }, { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 19470336 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 32053248 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32446464 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "97ee9ffe41d5dc61fe03a22550c25e14" }, { "dataPath": "params_shard_34.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "76443f5ab2569d636a70c99b8d230557" }, { "dataPath": "params_shard_35.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 4718592 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 4866048 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 19021824 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 }, { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 19470336 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 32053248 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32446464 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "5570358fa56ffee02f8a38fce5605bef" }, { "dataPath": "params_shard_36.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3498da9e7aa57567fef5d6c5511a99fa" }, { "dataPath": "params_shard_37.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 4718592 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 4866048 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 19021824 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 }, { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 19470336 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 32053248 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32446464 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "73ee37dbf2be510ee2671d2f078d4001" }, { "dataPath": "params_shard_38.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "60980736534d9a273d12f6f54e5d9852" }, { "dataPath": "params_shard_39.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 4718592 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 4866048 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 19021824 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 19470336 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 32053248 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32446464 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "6824f6a6f2dc85083a173c69d94d3a45" }, { "dataPath": "params_shard_40.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "50d4bbb0e029eeeb4631629f6f82ef77" }, { "dataPath": "params_shard_41.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 4718592 }, { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 4866048 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 19021824 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 }, { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 19470336 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 32053248 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32446464 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "9af2a2c272f4847d7042034ca87fc2f3" }, { "dataPath": "params_shard_42.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "cfaa75ebbe74e4d44ed5860dca67ab85" }, { "dataPath": "params_shard_43.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 4718592 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 4866048 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 19021824 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 }, { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 19470336 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 32053248 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32446464 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "55664ac3a81a159147293599e2143f0c" }, { "dataPath": "params_shard_44.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "795fe155723c6e6b303a38276139d5a4" }, { "dataPath": "params_shard_45.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 4718592 }, { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 4866048 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 19021824 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 }, { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 19470336 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 32053248 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32446464 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "697979e9f22919e556f1c1767b777090" }, { "dataPath": "params_shard_46.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "92c41702d1a27cfd5413b709130b0f65" }, { "dataPath": "params_shard_47.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 4718592 }, { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 4866048 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 19021824 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 }, { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 19470336 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 32053248 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32446464 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "ffce584e4e06ff7995aadd464ec9517c" }, { "dataPath": "params_shard_48.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ebd717793f5ff9f9fdf7fbdc20c8bde1" }, { "dataPath": "params_shard_49.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 4718592 }, { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 4866048 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 19021824 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19464192 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 19470336 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 32053248 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32446464 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "46342309ceda3dcbada8249b01a960cb" }, { "dataPath": "params_shard_50.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a1200557e9f66f5933f6160c4f04d14f" }, { "dataPath": "params_shard_51.bin", "format": "compressed-shard", "nbytes": 25116672, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 4718592 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 4866048 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 19021824 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 19464192 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 20250624 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 24969216 } ], "md5sum": "c35d7c187ba746eb6d417b228dd58791" }, { "dataPath": "params_shard_52.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "51bf4fe0a7a5364129dbc090c1aefc6c" }, { "dataPath": "params_shard_53.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 14155776 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14598144 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 14604288 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 27187200 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27580416 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 28366848 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28372992 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 33091584 } ], "md5sum": "37f714b4329b775a252ddd90c915c050" }, { "dataPath": "params_shard_54.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6970fa2d6083948b5260c93efde0f0ab" }, { "dataPath": "params_shard_55.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 14155776 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14598144 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 14604288 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 27187200 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27580416 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 28366848 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28372992 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 33091584 } ], "md5sum": "1e5561efb495ca6b5a3c008a9fe76448" }, { "dataPath": "params_shard_56.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "12686e6735769e9d38a6475da606b34f" }, { "dataPath": "params_shard_57.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 14155776 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14598144 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 14604288 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 27187200 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27580416 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 28366848 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28372992 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 33091584 } ], "md5sum": "b3dcbc2bc1fce6afd07a096c19054ab4" }, { "dataPath": "params_shard_58.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b8445fd0dd0a24aa10e2668c91a9209d" }, { "dataPath": "params_shard_59.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 14155776 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14598144 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 14604288 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 27187200 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27580416 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 28366848 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28372992 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 33091584 } ], "md5sum": "fdf4c04b457cbe027b645d07a1cb61ac" }, { "dataPath": "params_shard_60.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "5266e32e8571ce1b29a9490e44000399" }, { "dataPath": "params_shard_61.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 14155776 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14598144 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 14604288 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 27187200 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27580416 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 28366848 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28372992 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 33091584 } ], "md5sum": "a7a2fdc1efa6bdc3e8309c9f44ee9540" }, { "dataPath": "params_shard_62.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "5812dc0fe676983b77c6b6bb61fa80f2" }, { "dataPath": "params_shard_63.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 14155776 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14598144 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 14604288 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 27187200 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27580416 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 28366848 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28372992 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 33091584 } ], "md5sum": "d4c2509aba9f77c24c8cb923397a515a" }, { "dataPath": "params_shard_64.bin", "format": "compressed-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "567ca9d87204922b24bc0398ba218365" }, { "dataPath": "params_shard_65.bin", "format": "compressed-shard", "nbytes": 33239040, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 14155776 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14598144 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 14604288 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 27187200 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27580416 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 28366848 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28372992 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 33091584 } ], "md5sum": "e0fd8ae41c165f3427fbe2af1ede03cd" }, { "dataPath": "params_shard_66.bin", "format": "compressed-shard", "nbytes": 14598144, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 9216, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 9216, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 14155776 } ], "md5sum": "107303b26fb0af34df9916e4a2301fe1" } ] }