{ "metadata": { "ParamSize": 325, "ParamBytes": 4212444160.0, "BitsPerParam": 5.001066781753473 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 65550336, "records": [ { "name": "lm_head.q_weight", "shape": [ 32007, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65550336, "byteOffset": 0 } ], "md5sum": "855ac22469f55fa0df943b16e8748bad" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 30746368, "records": [ { "name": "lm_head.q_scale", "shape": [ 32007, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8193792, "byteOffset": 0 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 8193792 }, { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8201984 } ], "md5sum": "4fbe5c5cdfc198ded5ae287b7f4e864d" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "81dc66ef5ba619a89f1a413976bce9fe" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 31014912, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 8462336 }, { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8470528 } ], "md5sum": "f75d525d375ed09de628dfe50348844a" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "5b679a7936294b441e3a90bf0057cf06" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "664fd2a8efb9444fed31db1afb629e13" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "5039a1cb0298436e4290a7cdefa1c03b" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "af57fa217bbfe0118d43c85d28162600" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "bf9c38372ef3497412f947154d3f51ea" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "3977010e27ff3bbd95b9508b4721fbbe" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 65550336, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32007, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65550336, "byteOffset": 0 } ], "md5sum": "3ab3f688ee4c0a3db472f79e8a63930f" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "ea09c57437f21544ace0531ed7b44a21" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "bfc5b798e413a5fb9a191c5ec8524b20" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3603738bfbe5be0d27c4917e2e9c4ac9" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 29255424, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.embed_tokens.q_scale", "shape": [ 32007, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8193792, "byteOffset": 9445376 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 17639168 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 17647360 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 20465408 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 26101504 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 26109696 } ], "md5sum": "46abb18646b9ccbc96ee097f50c84e8b" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "6fd5b8a73169ac729828061522651c2b" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "f0e882e1db1444822aeb92facc1649b6" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "977d25005c629d477115edf7d97d41cd" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "7fa8453ccbb8aba58078beecc1504bd3" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "15debb75859df27a95eec815f023523f" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2d95836207bfcd9f773d758fcecf6158" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "069ebfdf09d39b27ccf5bc90ef268eea" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "4b033d1c25757c5797b1c1c217bef17e" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "a6b9ba970fb778d04a7a00c1e19d1f7d" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ebe5af368423cf88274f36a80804e4ab" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "d5f46103af8a1ca1a5a58abafb2ae87c" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "85153cecda7efbd12769ad6700d98fed" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d9129e4a0a6ac44d7020b54387502da7" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "144ae042ff9c5f85bf8260e9b0ed8119" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "02408f20e64ee44829c99072a95fc9e5" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "584bf7b3b44f0ca920570280b8baaa28" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "5954b41febaf4cc4ef969c61a167c5d0" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c4d9848e3b81655a81699e2483a546c3" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 30490624, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 9437184 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 20971520 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 22020096 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 22028288 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 24846336 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 30482432 } ], "md5sum": "0fbb11cbc446e84acfde616f5ed05fe5" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7bc482b01076e7c2bb5f7f26754d6ea7" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "76a317bcfe39d2046afc4e58a64b13a6" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "bee9e759c6b7492335bb977f0cfbae0b" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 27992064, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12582912 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 15728640 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 24117248 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 25165824 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 25174016 } ], "md5sum": "83b9504075e850b293954a4af9382147" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "15c75dd7a0da6a2a2b360d941c6f75a6" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 31014912, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 0 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 5636096 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 5644288 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 5652480 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 28196864 } ], "md5sum": "0e46dc02c00729233d508fb7844298e5" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 30810112, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 0 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 5636096 }, { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 5644288 } ], "md5sum": "2b51471d6b4146a193828690832f04f3" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "f2a8b386df79b83c15ddd9e464ee76d0" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "48877a56ce06f7a87fad15f1e0efa31d" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "063eaf830ab9aa315e663704ec63b951" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "cdf77ebd3dfe86d903245e5d25537cd7" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "ca2774ade9f8475dc0a041901cae9d4e" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c29c814092c3dd4e77b4de9722e1b353" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "b9ad7aceb3b8a202dfa785fbfa15b5df" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "130b81a4e00b7f4f8670043f35329007" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "49f8933998123df4e476116ca73a81de" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a7ee33eb2277cbe5a42012c848b0ec08" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "7ddb6f5d4ad4efb0658aea80f8d8626b" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "48a124b3dcf1e47ceabf5a8f5cf987db" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "2bdc1ad8459dc750fbe871ddb3ef7e0c" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 32071680, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 32063488 } ], "md5sum": "aedbdda7492e50890ddf0e4cf77f72dd" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "fae3977aef492acb617a0b463d65e0ad" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "025ad19d38f34dfe754e2bc074a00c3f" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "85938b4197c5eb6654a26c78debf6fea" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "a4e41df496af86f43908682e557a8983" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1c36110032b2b5486512e8451fa7a9a9" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "593ef533610cf305931263af31f6cb56" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "8c67ed785cd89698261f3cd391585181" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "a355f621ca9a8fed5a67152929ee9326" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2892d2ad1581c5a6e3b9e4804e2047a0" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "39bb91c975f18521f384a31d956caf91" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "d4fc32fcc597bce654dd403022271fba" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4151f5f600fcfb650cc862f6446a1bc9" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "7a7706381558b915ccf9602b41482c39" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "d32dc8caa69d8c1f6c2d719da609180b" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "bd471813d53c2724beffafa971592339" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "fa473356b5f169f1896da785137ea16f" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c799858d264b98364d3a9e21d3b2f01a" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 32579584, "records": [ { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21045248 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24190976 } ], "md5sum": "65216b322f8523a248a26e0672936537" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "abeb5f03251e80df8cd3d805135907a6" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "31272067dd20854f09d57d783dceb1d5" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 32071680, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 32063488 } ], "md5sum": "9155f0e0eaeef9e970506806a9a058d2" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "8d3287e93c5d5f426e6c9785866e1624" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "929ba88cbcc8e04adc63aa5627014ca9" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "6ca79d9b1bb9cdb963778de446b47860" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "9f76f8b0949b8551d8bc84074ab19b25" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "67fe897a2f9bc309fee61c0a4b2c4fd8" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "aa3f73b5a0d805dd1bbeee34855f7b99" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "fa19d278c1fa5c3bddc7bf55fd0bfbd1" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "f614e6d5414c53aecd3e84ed00023e29" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "20ae69f65c42c461b66a8ad52d0e085e" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "4b3848ffb9a1e4d8098cab3391c2da76" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "3651584daf5257dd22dff2b146983a26" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7c60e3003fd0c125dd723d3c63b8d82f" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "6009bad99305a62f0bcfc78bfc8a3f09" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "6d3e79d20c11a18240a5c7be244f938d" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "2d1b1ab105bc70ebf7f7b9aed9b59ba7" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6d3730120555781e3f91de13d226df23" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "284c0f2b790601462ca11e618e586dec" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 32579584, "records": [ { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21045248 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24190976 } ], "md5sum": "8f0c24bd1e82031aa6d447edf458aeff" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "cb9fb0f2a78ea6d8a056da0649faded8" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "4ecc360af81bff0a71e0e1f5b7f690f3" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 32071680, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 32063488 } ], "md5sum": "4b72d97d4e8109f9ba36361e60f1116f" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "37bc7b9c47e24b5f29cd594f9aed9409" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "60a9c7356b8b98d3d2ecfedeec480d88" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "f9f8e2c13c86b4c515c05cf2185043ba" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "630d3557a6e5f0e877e1437bb5e67612" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "385b28a90ec99869a901d4e51242e6d2" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "3256952ac212f3fcb75b463c527d67a1" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "db6a0113d5c1a2cc02edca5b5185fc54" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "5e03802977d168c40014dc5de8f150e8" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8b2eeefd9593ed48150aa520b62b568d" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "02fa369b9dda6b8a33fd72c8c2d3ec92" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "6af21bca171d8e7bfb7f99c8dbfd1184" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b38674c74f3f7423f3fd5dd72a9b0fe0" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "8a50401d721ebc7ab295a77c3efd96d8" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "8c79e043303cfe13030860b40527180a" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "6c66931bd56885aa8c2da3bba038475e" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0e22dc5bed21ca92d6d87b0eb5b0112e" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7518549b5d32da170524860060ace2c3" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 32579584, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21045248 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24190976 } ], "md5sum": "97ef50f331b641a51d50d0b1dfdfe52e" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 1048576, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 } ], "md5sum": "ff94b04fa3519aa0336b6eff48450a82" } ] }