diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,4383 @@ +{ + "metadata": { + "ParamSize": 325, + "ParamBytes": 4517413888.0, + "BitsPerParam": 4.500381276979491 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 262672384, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 128258, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 262672384, + "byteOffset": 0 + } + ], + "md5sum": "8c05e54ad4d8a81225d50a43787bab44" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ca6fdf9c39d5061219d97d6485c93085" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 32842240, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 128258, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 32834048, + "byteOffset": 0 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32834048 + } + ], + "md5sum": "4c19552e6bf41fa27e9aa979c232d2e4" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 262672384, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 128258, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 262672384, + "byteOffset": 0 + } + ], + "md5sum": "7480ba3a887de48d654c27b343339259" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 32834048, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 128258, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 32834048, + "byteOffset": 0 + } + ], + "md5sum": "306eb6bf7274bb667613b64f80a2a0c2" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 33054720, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 3694592 + } + ], + "md5sum": "115ba3853ca56c55c09ae92316745657" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "545abb5cddbf9daeb2efefc8abe83178" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "56b486ad7b374d63390e60933cdcdcbe" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "e7183ac0eec1b7e343fa866729d5208e" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "24973cedffa6fa2a64120ff6b13a0a49" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "aea332438cbb099e72f5b73dc0ddd6ee" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "a388c697f028c3c593ede87d18989a2c" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "368516874f6a2fbb99d6b87f80a4deff" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 11018240 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 14688256 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22028288 + } + ], + "md5sum": "ebd25fcd108dba60de6b0972886d5438" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "19c7cc5dc64efb2907387c954a5a918e" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "9c6dc0ef78cbb0a6cdcedd59127964d0" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "eba032a9707e8e3fe00867690229b20b" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "3fb531142c69efb57b882572ec9b36cb" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "4087e41f7eeb8e02fd3c6c70548ab87d" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "0a36c909f10779ff6f8895b1aeffdd5a" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "d4e92cdb894b4deade71cc20f2cdd844" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "32a3ce4937ae1cfd356fb7c05e963c05" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "87b92c12c0d1b05169a43048f84ea1d8" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "702ba373463b9a8c2c1554b296874eb4" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "cf2c0c84c0953d7e7244b6b6b75b8f3c" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "e2be5f3b8f3af89739e3497c798c0abe" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 11018240 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 14688256 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22028288 + } + ], + "md5sum": "0b8b12fd294c751894f08e469b04d2aa" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "fe07bdc5a29a9264e7f8b3a899a373d6" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "b8f85d96a0a46dd0f40ab65a8b2cfbbc" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "cbb7c428dd186e70a5ccce91782afcff" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d9f7cbeafba7833d11dced63f24122e8" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "d8e7d3335c536bcf239fd6483b0e1ffa" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "94e68489b1fc7fad39c26bf8247214a4" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "26849f15dcd75e9dd67949ba91333847" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "3d330335ce3d7d92e6999035bbcaf1cb" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "ca7566d397a4e3772e3f78129c582be3" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "1e6bd4488b7910f2d8f415f6acbcc792" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ed7552967405dedf934f0c242bee1fdc" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "5f77c1602d2509ee641aa18ac117f494" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 11018240 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 14688256 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22028288 + } + ], + "md5sum": "51ee0ff5c759a643447da404d7bc18be" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "81a37c863060e8ad297f0c171188ee6a" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "aa070d0776e201f168b76077b5b35b6e" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "5aad222d6ccad83b81940d563fc76638" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "797de767a09f4af4e1b1a5ef82fa6870" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "65c0e3c7669947fcad745352b4b0bf04" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "8e83cf7f4a4580248454b8998911db33" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "cf5cfaa01d7c9ff5b1a681c4ba7d9ab8" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "9b2ef50dafe1e56bdf59321f90654241" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "a6c480b9920bb6efd68c743adfdd1b01" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "0c061106ebb580f45e40670858074fb2" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ee7c4071c8631bc3e6e8c8603eaa901a" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "98e5c8901d86fe3d78298f42f8f8eb07" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 11018240 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 14688256 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22028288 + } + ], + "md5sum": "66ed56d66997e068b331c641bd34d4be" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "a9e80c4c17f3b41dfe914f7a1d9bbff2" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "6faea2e08fc21174077cd783157c68c0" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "9728168ba975faa973078cd567a0f1c4" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b27d05e012aaad579488faadbbe6420b" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "184c41d240320d0e0dcc398b90a6d84d" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "ea188730ec956b371761f3dc215aedd3" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "9e2f5ba561e381e3c71c432bb8ee6871" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "20b619762dff1309e809381bfac4ac1b" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "6d4973eb6c559ffc1b36709ddb7bb506" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "e8eaaa53b3e379130f13ff5046bea71a" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "25b4f409c71aafe01fef38851603a73a" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "9183663c3c496031543917e1c058ea3b" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 11018240 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 14688256 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22028288 + } + ], + "md5sum": "2dac07339f4a5cc444e3b9c5111b4e21" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "659b2cb39768b55c735c9be2fd61fd77" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 30932992, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 23592960 + } + ], + "md5sum": "c9f949b98284c9f9ebda8fd2732436f5" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "519a618301a19e5bd8991c8fc7bbdc4e" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "fca3122fe58f7b797d88c4b82245a716" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "8a966a7cd920526c68d9d301ef4e7ced" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "369db3620992b095af2c85bd9157cd86" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "2b819297b6f535e6a342afffb6bcc63f" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 3686400 + } + ], + "md5sum": "649a96d6aad3cc8b2ae46310c0e027c1" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "446c7fd719984242a4f166006672ee2e" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "5610aeac9c366f4ad1f01a97d29d93f7" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b66cf3ec873a1b84559794dfe726154b" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "8ed9a767a4e836ab4663851b986ba7d5" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "331838ca8378039d1adcbfbd7158d8c7" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "7fa69f26a4877c4552eb87b8b53ebdf8" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "31d73e9abbdd22092bdbb8ccfc75f919" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 11018240 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 14688256 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22028288 + } + ], + "md5sum": "e5f94f619e903191d8d775af914d5420" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "6d95d0bd25a29140d3ddd4a7ec3eed01" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "f93b6e74454026038dbe8e9168c51f17" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "71f3b7806813fbb804255a6dd9a7196f" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "7e23a6ea5cdb345e0cdb158ad69b6482" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "ec174c872b8a2202fec7e007ac4a8dd6" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "2e5afed1d29edf7eb05dbcf490d1b4c8" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "1b8856f64d831affaf085e232060c893" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "f278daf2b7df0c3b9e54edc4484029e3" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "51cdc83ffa6baf9a74a39cddb5349d04" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "d217acc84bacf2177b4150c270817088" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "285cef9ddbd41b37ab894c37d4658aad" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "d4b05220d4e6fe5cdc00b63f905ea9b9" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 11018240 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 14688256 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22028288 + } + ], + "md5sum": "3f356e4ec5f50dfc7acfbafd23ec250a" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "44e06802b543e5e449bde70ecef81426" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "1048a752bf8e7212efeb533997174c3e" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "d5711d83b4916596c376c0babc6d0574" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "194ebed61f5c6bb622cc86a8398f5cdd" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "6d9194bbdd603a9dd0ac008161ce1975" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "3dc8742475fbefb55ee6239dcaef336c" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "488a64810adb3db6cbb6ba3e4e662e70" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "0e61cc1e05bdf8ec3b74b2fafaa78e3b" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "52a7b285849b7de9a59ee88bd146aaa7" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "d547311a66b60d2f56cb936730f99174" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "c630b3803cd9a5cc4f0935784ffc9bbe" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 32505856, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 11010048 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 18350080 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 30932992 + } + ], + "md5sum": "a68f597bfdf7ed7a67489830d3d3839f" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 9437184, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + } + ], + "md5sum": "a2cbeac64bda3ef488bf2d50aef9358f" + } + ] +} \ No newline at end of file