diff --git "a/ndarray-cache-b16.json" "b/ndarray-cache-b16.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache-b16.json" @@ -0,0 +1,4383 @@ +{ + "metadata": { + "ParamSize": 325, + "ParamBytes": 5019811840.0, + "BitsPerParam": 5.000895173865207 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 262668288, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 128256, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 262668288, + "byteOffset": 0 + } + ], + "md5sum": "5cabec76d48ed559842795022374b28a" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ae3b92978d1c0f976e931bd5c6e0af3e" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 32841728, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 128256, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 32833536, + "byteOffset": 0 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 32833536 + } + ], + "md5sum": "a277c14aa8f6a0003360b117ba40ab9f" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 262668288, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 128256, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 262668288, + "byteOffset": 0 + } + ], + "md5sum": "11c62343e4f80a3526c30c358ee87b61" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 32833536, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 128256, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 32833536, + "byteOffset": 0 + } + ], + "md5sum": "b8664775502467d862c46a308fb62dcb" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 33054720, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 3694592 + } + ], + "md5sum": "172742f8892c96fb2addc4e97de78d23" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "1496d1bb48998a15619d5e592a5e63a4" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "0f1ca94fdfa4a4c7105084a8b28b7cc8" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "f17a4e5a34e837b21e201761ea773ab9" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "42a65211503f03035f06c0de5ffcb747" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "dcd52056c0312564cd0cd1fcf83ae090" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "28e3d57248dde075fcfc7684c41f502f" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "d2f4a74323128db7f88869dafbb92675" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 11018240 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 14688256 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 22028288 + } + ], + "md5sum": "1a18e22916f3be025381e8177697353d" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "45fac8cfe3c64f9274ca412166ca468d" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "73186d4f5a1c07ed3104fe156da65212" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "1e90926f67149bc08cc15fd13230f8be" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "0aacf8a645a4fccb24ec136e1c83af1e" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "2c7c6b5cdacd7de034c8ca5f9d552526" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "1630beed5b679338de8f257f6327eca3" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "c1bfa75c1f03e2cf673f86d89c675140" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ab5e806e82022f7703b09b4c999ee8e2" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "13347c0c7d5fe1297781dd26b2896d84" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "bdd3293d8340c89860f01ab0fb012b03" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "040d68d1e6306613c207839043734591" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "0894fc87b0c3a18778dc8b44c57353b4" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 11018240 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 14688256 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 22028288 + } + ], + "md5sum": "3db9dfd5a235696ca9b6a8301a799641" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "5b1befdea5b00f2d9aa48c6f7d5e6c7d" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "3227406acf55b7fa0c909802376702e2" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "44552a1f266f3d0fe69b6eabc01e61ec" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b3dfc5c93313d94c3f33862f869a630e" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "f52cb631e2cdd06bd7921b0a05b93c6e" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "b36f095ed0b58916e14cc1ae1161bbfb" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "ec5ca27cc20827c3f94f975abdd3e463" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "def11665e685c54a476fa17a0561e976" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "28723b6cc103a0c58a74fd2312fa4ed7" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "10903cc32478819f65886ebfb7c8f238" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "403c6ba3f819c6a44da2ee408a5b4087" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "6b9d024d8d60045c9a122c84eecc883e" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 11018240 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 14688256 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 22028288 + } + ], + "md5sum": "13bbf08115472802ce223f1fe6ac5733" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "c9166ed2a1865e87dacd52234f97bc25" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "70c57ef7e3831226a2e22867c6eb749a" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "da1e87d69988051728dd14cb568ef68a" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "e7df8df029975d95955fc8599ae5af7c" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "57f0240f81eea0e4bef42a3fe5fab280" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "42f6cc74c0a7a2ea0194bf5f0480c167" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "297b87c9470324264299d2ccd02e4b4a" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "777ebcc676655554d6a88dd29faba427" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "cd0e03e7ca6e73741cc6d112ff0e9c42" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "34b10fe157866c5f8840af4b20654d12" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "2202573529b0499fb7b3f37e485b776c" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "9e0b7ce426eb5a91283874e0957b903f" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 11018240 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 14688256 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 22028288 + } + ], + "md5sum": "cd32f4969010a1430ccf3fe635712bcf" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "1baa3e3553e9a2f3bae10ab45d829c0f" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "2068d0d2f13c4fbebe1b61c63ffddee5" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "7c10a963042f40e2dedeb7e44bc537bd" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "54bb6d72f05710a89e6f0c6c6f19b00d" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "11ce070daca2ee6278f982def097efb6" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "2b46a6672a585737d0718efe171d2401" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "8d8644c735b613feb10b53b043b21639" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b0ddd1a2888b5e9bd532ae4293158d17" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "c1f786eb9053be54d0626b4677ba1716" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "dd00e687a11ea64b3c715b8527ab4eb4" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "17dca5fafba6554cae5ea073c4c5a2fe" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "6d4b2146306fac9f02e879582ec8a758" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 11018240 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 14688256 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 22028288 + } + ], + "md5sum": "67fd222c033b409b435868d7a22a5aca" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "557efcc08f587baeeef469dde4fa4a32" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 30932992, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 23592960 + } + ], + "md5sum": "e46de5bd639de42f6063dfd091dba271" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "a1420d61a734615add7b0bfb574945cf" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "de4cbbb7f6c93e72cdcbf4556e247afb" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "6282f9e3fa5d813a25966601bef673dd" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "123d115945f97e830bd1f07acdeda483" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "f93da5e154d55f2de683c2307479425f" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 3686400 + } + ], + "md5sum": "94c157429373fee8b19b2d0f5311b9ee" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "470d2e03a89f6774c8f1a1a6a38fad4a" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "1b6c2a8dabf64a36a57743ee7466d131" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "f476665299214e8f824ead7f8bbedba8" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "77ab3a84a36395d8d3381aba6f020ac4" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "c98cf6736216ce43a17700f7f7151265" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "1f41b2e2ba0e867b86cb5970adea07f6" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "1fa2a79bc4594e1b11d2ee28b2fb7240" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 11018240 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 14688256 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 22028288 + } + ], + "md5sum": "9c9887ee6b3d0408f0ac8b0f01a96038" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "a5782ffd228c7f8bc29f2af4af66e285" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "1dfd7c9edb7af4dc884518c33da278a4" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "488b523df491ef1a279f282473b60393" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "f99c2128053413a238bd3cf3d9fb6abd" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "2d0c706e9aa4abe24dc44f2ef9aa1e7e" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "eb9641fa85397a3ed14bf399c626212e" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "01ce6398ee838a1ecbefecb0cfe27f7e" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "4e3ca53f117199e491c6f88a5ea84425" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "78fa188656fd1eca2f3b81bbdd143003" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "5e6e17006351b1c52193c11c9b471459" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b8dc90aee00ccafc952e6bb46c1034b7" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "2dd56dca8b83833db0819f8a11b82874" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 11018240 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 14688256 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 22028288 + } + ], + "md5sum": "ec864e552f900b9f3470005589e84b61" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "e9e2a7a67b0bf872e123c00420fbc289" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "b4c644906b9a47bf97a3cee6eacefa9b" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "cb9420ec7f8ad2788d962ae5b452ab68" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "cfd32ef170a8f2618f4327ae0f768f90" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "cae363c821ccc95e4ee1447f46846da1" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "e9aefc627336ff029d141b2947943e56" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "8e1239f35200d1a88f51edaecdea46ec" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "8f7d57fc66b7d74ecd9ca58d7c61eea6" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "5086ae07a56e8ab2ad45067f563e36b1" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "16e9a5b1fe60e0b0f5955eb91837dd03" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "30e43c9732a10252e22841ef58c932ae" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 32505856, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 11010048 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 18350080 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 30932992 + } + ], + "md5sum": "ce680e53da5061f96c87322bd5fd7cd3" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 9437184, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + } + ], + "md5sum": "646edd13b453b4c67a19b096a77550ed" + } + ] +} \ No newline at end of file