{ "metadata": { "ParamSize": 965, "ParamBytes": 3075119104.0, "BitsPerParam": 4.52012387628928 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 65536000, "records": [ { "name": "lm_head.q_weight", "shape": [ 512, 32000 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65536000, "byteOffset": 0 } ], "md5sum": "2d09ff227ad5f5579aedb1df2c809ac7" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 28516288, "records": [ { "name": "lm_head.q_scale", "shape": [ 128, 32000 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192000, "byteOffset": 0 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8192000 }, { "name": "model.layers.30.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 8200192 }, { "name": "model.layers.30.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 13115392 }, { "name": "model.layers.30.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 13729792 }, { "name": "model.layers.30.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 26873344 } ], "md5sum": "e9ecf8649ea7a4c2398b5144ec8844b7" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 20362752, "records": [ { "name": "model.layers.30.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 0 }, { "name": "model.layers.30.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 13209600 }, { "name": "model.layers.30.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 14860800 }, { "name": "model.layers.30.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 19751424 } ], "md5sum": "4445a6b9cbf73c648e569debc14bc8d8" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 31812608, "records": [ { "name": "model.layers.30.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 0 }, { "name": "model.layers.30.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 13209600 }, { "name": "model.layers.30.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 14860800 }, { "name": "model.layers.30.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 19751424 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20362752 }, { "name": "model.layers.30.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 20370944 }, { "name": "model.layers.30.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 23778816 }, { "name": "model.layers.30.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 24204800 }, { "name": "model.layers.30.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 27559424 }, { "name": "model.layers.30.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 27978752 }, { "name": "model.layers.30.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 31386624 } ], "md5sum": "ba37f225fd1ad35a93013e8053821d3e" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 20693504, "records": [ { "name": "model.layers.30.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 3354624 }, { "name": "model.layers.30.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3773952 }, { "name": "model.layers.30.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7128576 }, { "name": "model.layers.30.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7547904 }, { "name": "model.layers.30.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 10955776 }, { "name": "model.layers.30.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11381760 }, { "name": "model.layers.30.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14736384 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 15155712 }, { "name": "model.layers.31.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 15163904 }, { "name": "model.layers.31.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 20079104 } ], "md5sum": "f32dcf1d70da85f4aeb1f7d42efedec6" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.31.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.31.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.31.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.31.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "1c1dd9a3edf58736b4f9ca80de88cdbf" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 33480704, "records": [ { "name": "model.layers.31.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.31.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.31.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 5501952 }, { "name": "model.layers.31.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 18711552 }, { "name": "model.layers.31.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 20362752 }, { "name": "model.layers.31.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 25253376 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25864704 }, { "name": "model.layers.31.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 25872896 }, { "name": "model.layers.31.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29280768 }, { "name": "model.layers.31.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 29706752 }, { "name": "model.layers.31.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 33061376 } ], "md5sum": "40a0f35dd928942bf945b9c2974306a0" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 65536000, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32000, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65536000, "byteOffset": 0 } ], "md5sum": "16494f75246325a9be7dcf30f5fea76f" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 31031808, "records": [ { "name": "model.layers.31.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3407872 }, { "name": "model.layers.31.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3833856 }, { "name": "model.layers.31.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7188480 }, { "name": "model.layers.31.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7607808 }, { "name": "model.layers.31.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11015680 }, { "name": "model.layers.31.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11441664 }, { "name": "model.layers.31.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14796288 }, { "name": "model.layers.31.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 15215616 }, { "name": "model.layers.31.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18623488 }, { "name": "model.layers.31.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 19049472 }, { "name": "model.layers.31.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 22404096 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22823424 }, { "name": "model.embed_tokens.q_scale", "shape": [ 32000, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192000, "byteOffset": 22831616 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 31023616 } ], "md5sum": "12daa5381e138e9cfd471fc86a9cef90" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 33525696, "records": [ { "name": "model.layers.0.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 0 }, { "name": "model.layers.0.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 4915200 }, { "name": "model.layers.0.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 5529600 }, { "name": "model.layers.0.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 18673152 }, { "name": "model.layers.0.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 20316096 } ], "md5sum": "49b3eb92dfb29d04992a3c19c97a86c8" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 31357952, "records": [ { "name": "model.layers.0.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 0 }, { "name": "model.layers.0.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 1651200 }, { "name": "model.layers.0.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 6541824 }, { "name": "model.layers.0.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 7153152 }, { "name": "model.layers.0.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 20362752 }, { "name": "model.layers.0.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 22013952 }, { "name": "model.layers.0.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 26904576 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27515904 }, { "name": "model.layers.0.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 27524096 }, { "name": "model.layers.0.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 30931968 } ], "md5sum": "b61185745c0c207dc6965b85e9c0760c" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 32135168, "records": [ { "name": "model.layers.0.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 3354624 }, { "name": "model.layers.0.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 3773952 }, { "name": "model.layers.0.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 7181824 }, { "name": "model.layers.0.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 7607808 }, { "name": "model.layers.0.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 10962432 }, { "name": "model.layers.0.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 11381760 }, { "name": "model.layers.0.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 14789632 }, { "name": "model.layers.0.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 15215616 }, { "name": "model.layers.0.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 18570240 }, { "name": "model.layers.0.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 18989568 }, { "name": "model.layers.0.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 22397440 }, { "name": "model.layers.0.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 22823424 }, { "name": "model.layers.0.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 26178048 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26597376 }, { "name": "model.layers.1.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 26605568 }, { "name": "model.layers.1.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 31520768 } ], "md5sum": "8e8e203ade60a9775f08559a33b75c2e" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.1.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.1.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.1.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.1.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "0d092e628f981fd39a153b4b2b2cbad3" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 33480704, "records": [ { "name": "model.layers.1.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.1.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.1.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 5501952 }, { "name": "model.layers.1.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 18711552 }, { "name": "model.layers.1.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 20362752 }, { "name": "model.layers.1.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 25253376 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25864704 }, { "name": "model.layers.1.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 25872896 }, { "name": "model.layers.1.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29280768 }, { "name": "model.layers.1.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 29706752 }, { "name": "model.layers.1.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 33061376 } ], "md5sum": "1685b255b945980d046e47185e4e267c" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 28361216, "records": [ { "name": "model.layers.1.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3407872 }, { "name": "model.layers.1.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3833856 }, { "name": "model.layers.1.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7188480 }, { "name": "model.layers.1.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7607808 }, { "name": "model.layers.1.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11015680 }, { "name": "model.layers.1.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11441664 }, { "name": "model.layers.1.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14796288 }, { "name": "model.layers.1.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 15215616 }, { "name": "model.layers.1.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18623488 }, { "name": "model.layers.1.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 19049472 }, { "name": "model.layers.1.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 22404096 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22823424 }, { "name": "model.layers.10.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 22831616 }, { "name": "model.layers.10.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 27746816 } ], "md5sum": "d20112977cc4c7a22981decfdb5624e5" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.10.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.10.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.10.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.10.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "0899370cc05384449589d9c91b60d6d0" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 33480704, "records": [ { "name": "model.layers.10.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.10.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.10.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 5501952 }, { "name": "model.layers.10.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 18711552 }, { "name": "model.layers.10.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 20362752 }, { "name": "model.layers.10.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 25253376 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25864704 }, { "name": "model.layers.10.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 25872896 }, { "name": "model.layers.10.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29280768 }, { "name": "model.layers.10.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 29706752 }, { "name": "model.layers.10.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 33061376 } ], "md5sum": "f1e457ba56b03a30a4ddc55bb4ad1717" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 28361216, "records": [ { "name": "model.layers.10.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3407872 }, { "name": "model.layers.10.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3833856 }, { "name": "model.layers.10.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7188480 }, { "name": "model.layers.10.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7607808 }, { "name": "model.layers.10.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11015680 }, { "name": "model.layers.10.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11441664 }, { "name": "model.layers.10.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14796288 }, { "name": "model.layers.10.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 15215616 }, { "name": "model.layers.10.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18623488 }, { "name": "model.layers.10.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 19049472 }, { "name": "model.layers.10.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 22404096 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22823424 }, { "name": "model.layers.11.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 22831616 }, { "name": "model.layers.11.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 27746816 } ], "md5sum": "70038a64d14f07409dd7289b525d4840" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.11.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.11.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.11.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.11.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "81049aac52d28a56be97e5258b140452" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 33480704, "records": [ { "name": "model.layers.11.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.11.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.11.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 5501952 }, { "name": "model.layers.11.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 18711552 }, { "name": "model.layers.11.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 20362752 }, { "name": "model.layers.11.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 25253376 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25864704 }, { "name": "model.layers.11.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 25872896 }, { "name": "model.layers.11.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29280768 }, { "name": "model.layers.11.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 29706752 }, { "name": "model.layers.11.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 33061376 } ], "md5sum": "7f4bec5b43b830b19b7dda9265da6edc" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 28361216, "records": [ { "name": "model.layers.11.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3407872 }, { "name": "model.layers.11.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3833856 }, { "name": "model.layers.11.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7188480 }, { "name": "model.layers.11.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7607808 }, { "name": "model.layers.11.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11015680 }, { "name": "model.layers.11.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11441664 }, { "name": "model.layers.11.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14796288 }, { "name": "model.layers.11.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 15215616 }, { "name": "model.layers.11.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18623488 }, { "name": "model.layers.11.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 19049472 }, { "name": "model.layers.11.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 22404096 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22823424 }, { "name": "model.layers.12.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 22831616 }, { "name": "model.layers.12.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 27746816 } ], "md5sum": "1066a7d8ea7c299c1e8441c1e95139d7" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.12.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.12.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.12.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.12.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "3eb2ff46424914ef00b61e49a25a8e80" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 33480704, "records": [ { "name": "model.layers.12.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.12.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.12.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 5501952 }, { "name": "model.layers.12.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 18711552 }, { "name": "model.layers.12.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 20362752 }, { "name": "model.layers.12.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 25253376 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25864704 }, { "name": "model.layers.12.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 25872896 }, { "name": "model.layers.12.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29280768 }, { "name": "model.layers.12.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 29706752 }, { "name": "model.layers.12.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 33061376 } ], "md5sum": "a8b92836e5c7076172616b3c5710813f" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 28361216, "records": [ { "name": "model.layers.12.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3407872 }, { "name": "model.layers.12.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3833856 }, { "name": "model.layers.12.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7188480 }, { "name": "model.layers.12.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7607808 }, { "name": "model.layers.12.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11015680 }, { "name": "model.layers.12.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11441664 }, { "name": "model.layers.12.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14796288 }, { "name": "model.layers.12.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 15215616 }, { "name": "model.layers.12.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18623488 }, { "name": "model.layers.12.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 19049472 }, { "name": "model.layers.12.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 22404096 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22823424 }, { "name": "model.layers.13.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 22831616 }, { "name": "model.layers.13.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 27746816 } ], "md5sum": "07a1bd54fcafa13d192d8b46630c71af" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.13.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.13.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.13.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.13.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "8c14ec568e8614f1b0bfc01a264cc51f" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 33480704, "records": [ { "name": "model.layers.13.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.13.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.13.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 5501952 }, { "name": "model.layers.13.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 18711552 }, { "name": "model.layers.13.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 20362752 }, { "name": "model.layers.13.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 25253376 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25864704 }, { "name": "model.layers.13.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 25872896 }, { "name": "model.layers.13.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29280768 }, { "name": "model.layers.13.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 29706752 }, { "name": "model.layers.13.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 33061376 } ], "md5sum": "347999e785f8bde7ca2bee632cf483ef" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 28353024, "records": [ { "name": "model.layers.13.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3407872 }, { "name": "model.layers.13.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3833856 }, { "name": "model.layers.13.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7188480 }, { "name": "model.layers.13.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7607808 }, { "name": "model.layers.13.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11015680 }, { "name": "model.layers.13.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11441664 }, { "name": "model.layers.13.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14796288 }, { "name": "model.layers.13.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 15215616 }, { "name": "model.layers.13.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18623488 }, { "name": "model.layers.13.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 19049472 }, { "name": "model.layers.13.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 22404096 }, { "name": "model.layers.14.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 22823424 }, { "name": "model.layers.14.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 27738624 } ], "md5sum": "9393c92a4b05bcfd235b8e17b07bf0dc" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 31804416, "records": [ { "name": "model.layers.14.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 0 }, { "name": "model.layers.14.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 13209600 }, { "name": "model.layers.14.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 14860800 }, { "name": "model.layers.14.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 19751424 }, { "name": "model.layers.14.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 20362752 }, { "name": "model.layers.14.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 23770624 }, { "name": "model.layers.14.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 24196608 }, { "name": "model.layers.14.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 27551232 }, { "name": "model.layers.14.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 27970560 }, { "name": "model.layers.14.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 31378432 } ], "md5sum": "bdc7e1c5c6bc6e378a0d8781154eed92" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 24527360, "records": [ { "name": "model.layers.14.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 3354624 }, { "name": "model.layers.14.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 3773952 }, { "name": "model.layers.14.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 7181824 }, { "name": "model.layers.14.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 7607808 }, { "name": "model.layers.14.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 10962432 }, { "name": "model.layers.14.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 11381760 }, { "name": "model.layers.14.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 14789632 }, { "name": "model.layers.14.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 15215616 }, { "name": "model.layers.14.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 18570240 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18989568 }, { "name": "model.layers.2.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 18997760 }, { "name": "model.layers.2.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 23912960 } ], "md5sum": "71cd12e98ed2fcdd6a6e8716808c6897" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.2.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.2.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.2.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.2.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "9e21b0776bbf46c410bad37ba30bccb3" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 33480704, "records": [ { "name": "model.layers.2.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.2.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.2.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 5501952 }, { "name": "model.layers.2.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 18711552 }, { "name": "model.layers.2.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 20362752 }, { "name": "model.layers.2.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 25253376 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25864704 }, { "name": "model.layers.2.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 25872896 }, { "name": "model.layers.2.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29280768 }, { "name": "model.layers.2.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 29706752 }, { "name": "model.layers.2.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 33061376 } ], "md5sum": "65b86c55b99f25af636c35759e608ea3" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 28361216, "records": [ { "name": "model.layers.2.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3407872 }, { "name": "model.layers.2.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3833856 }, { "name": "model.layers.2.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7188480 }, { "name": "model.layers.2.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7607808 }, { "name": "model.layers.2.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11015680 }, { "name": "model.layers.2.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11441664 }, { "name": "model.layers.2.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14796288 }, { "name": "model.layers.2.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 15215616 }, { "name": "model.layers.2.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18623488 }, { "name": "model.layers.2.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 19049472 }, { "name": "model.layers.2.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 22404096 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22823424 }, { "name": "model.layers.3.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 22831616 }, { "name": "model.layers.3.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 27746816 } ], "md5sum": "bf93386dd2e54d53a0b178ea7b01dbc1" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.3.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.3.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.3.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.3.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "7ab3ea1fcf6c2b6985605fdd7fade21a" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33480704, "records": [ { "name": "model.layers.3.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.3.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.3.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 5501952 }, { "name": "model.layers.3.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 18711552 }, { "name": "model.layers.3.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 20362752 }, { "name": "model.layers.3.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 25253376 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25864704 }, { "name": "model.layers.3.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 25872896 }, { "name": "model.layers.3.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29280768 }, { "name": "model.layers.3.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 29706752 }, { "name": "model.layers.3.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 33061376 } ], "md5sum": "f325b84d2da030a6357ce1a5cd3135e4" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 28361216, "records": [ { "name": "model.layers.3.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3407872 }, { "name": "model.layers.3.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3833856 }, { "name": "model.layers.3.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7188480 }, { "name": "model.layers.3.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7607808 }, { "name": "model.layers.3.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11015680 }, { "name": "model.layers.3.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11441664 }, { "name": "model.layers.3.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14796288 }, { "name": "model.layers.3.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 15215616 }, { "name": "model.layers.3.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18623488 }, { "name": "model.layers.3.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 19049472 }, { "name": "model.layers.3.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 22404096 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22823424 }, { "name": "model.layers.4.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 22831616 }, { "name": "model.layers.4.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 27746816 } ], "md5sum": "390cc511628f498dc73b8b9deccc42af" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.4.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.4.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.4.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.4.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "1be69c1f7c96370c8e15a1587f47738d" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 33480704, "records": [ { "name": "model.layers.4.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.4.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.4.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 5501952 }, { "name": "model.layers.4.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 18711552 }, { "name": "model.layers.4.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 20362752 }, { "name": "model.layers.4.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 25253376 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25864704 }, { "name": "model.layers.4.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 25872896 }, { "name": "model.layers.4.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29280768 }, { "name": "model.layers.4.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 29706752 }, { "name": "model.layers.4.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 33061376 } ], "md5sum": "797f263a6654e9c1e27fe93db223c2bf" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 28361216, "records": [ { "name": "model.layers.4.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3407872 }, { "name": "model.layers.4.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3833856 }, { "name": "model.layers.4.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7188480 }, { "name": "model.layers.4.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7607808 }, { "name": "model.layers.4.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11015680 }, { "name": "model.layers.4.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11441664 }, { "name": "model.layers.4.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14796288 }, { "name": "model.layers.4.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 15215616 }, { "name": "model.layers.4.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18623488 }, { "name": "model.layers.4.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 19049472 }, { "name": "model.layers.4.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 22404096 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22823424 }, { "name": "model.layers.5.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 22831616 }, { "name": "model.layers.5.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 27746816 } ], "md5sum": "bd458875c632a061413cd646ead84fd0" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.5.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.5.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.5.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.5.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "ee8717dd4514f33a330ffc32ec9ab010" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 33480704, "records": [ { "name": "model.layers.5.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.5.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.5.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 5501952 }, { "name": "model.layers.5.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 18711552 }, { "name": "model.layers.5.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 20362752 }, { "name": "model.layers.5.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 25253376 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25864704 }, { "name": "model.layers.5.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 25872896 }, { "name": "model.layers.5.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29280768 }, { "name": "model.layers.5.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 29706752 }, { "name": "model.layers.5.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 33061376 } ], "md5sum": "3ca8d836450bd32346b502c64d240836" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 28361216, "records": [ { "name": "model.layers.5.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3407872 }, { "name": "model.layers.5.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3833856 }, { "name": "model.layers.5.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7188480 }, { "name": "model.layers.5.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7607808 }, { "name": "model.layers.5.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11015680 }, { "name": "model.layers.5.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11441664 }, { "name": "model.layers.5.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14796288 }, { "name": "model.layers.5.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 15215616 }, { "name": "model.layers.5.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18623488 }, { "name": "model.layers.5.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 19049472 }, { "name": "model.layers.5.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 22404096 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22823424 }, { "name": "model.layers.6.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 22831616 }, { "name": "model.layers.6.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 27746816 } ], "md5sum": "41ac3646f310cd803cbf496b44ee1b98" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.6.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.6.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.6.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.6.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "d08b37aa118fbd6e62d27920b0c8efa6" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 33480704, "records": [ { "name": "model.layers.6.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.6.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.6.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 5501952 }, { "name": "model.layers.6.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 18711552 }, { "name": "model.layers.6.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 20362752 }, { "name": "model.layers.6.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 25253376 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25864704 }, { "name": "model.layers.6.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 25872896 }, { "name": "model.layers.6.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29280768 }, { "name": "model.layers.6.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 29706752 }, { "name": "model.layers.6.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 33061376 } ], "md5sum": "ffd5f3e40b818c6653cbb3279d808167" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 28361216, "records": [ { "name": "model.layers.6.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3407872 }, { "name": "model.layers.6.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3833856 }, { "name": "model.layers.6.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7188480 }, { "name": "model.layers.6.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7607808 }, { "name": "model.layers.6.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11015680 }, { "name": "model.layers.6.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11441664 }, { "name": "model.layers.6.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14796288 }, { "name": "model.layers.6.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 15215616 }, { "name": "model.layers.6.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18623488 }, { "name": "model.layers.6.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 19049472 }, { "name": "model.layers.6.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 22404096 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22823424 }, { "name": "model.layers.7.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 22831616 }, { "name": "model.layers.7.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 27746816 } ], "md5sum": "95660ca5088bff34db1dbd2d7d83dd49" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.7.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.7.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.7.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.7.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "6042fe8e6bce060db74c738292895c0a" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 33480704, "records": [ { "name": "model.layers.7.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.7.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.7.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 5501952 }, { "name": "model.layers.7.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 18711552 }, { "name": "model.layers.7.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 20362752 }, { "name": "model.layers.7.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 25253376 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25864704 }, { "name": "model.layers.7.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 25872896 }, { "name": "model.layers.7.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29280768 }, { "name": "model.layers.7.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 29706752 }, { "name": "model.layers.7.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 33061376 } ], "md5sum": "eed0c34ec21d0e55244050f55e1d6825" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 28361216, "records": [ { "name": "model.layers.7.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3407872 }, { "name": "model.layers.7.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3833856 }, { "name": "model.layers.7.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7188480 }, { "name": "model.layers.7.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7607808 }, { "name": "model.layers.7.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11015680 }, { "name": "model.layers.7.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11441664 }, { "name": "model.layers.7.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14796288 }, { "name": "model.layers.7.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 15215616 }, { "name": "model.layers.7.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18623488 }, { "name": "model.layers.7.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 19049472 }, { "name": "model.layers.7.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 22404096 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22823424 }, { "name": "model.layers.8.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 22831616 }, { "name": "model.layers.8.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 27746816 } ], "md5sum": "8e15c22a4b525884028bb94380dcb415" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.8.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.8.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.8.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.8.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "523adca410c4d2f0d008dee8ef29d4a3" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 33480704, "records": [ { "name": "model.layers.8.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.8.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.8.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 5501952 }, { "name": "model.layers.8.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 18711552 }, { "name": "model.layers.8.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 20362752 }, { "name": "model.layers.8.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 25253376 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25864704 }, { "name": "model.layers.8.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 25872896 }, { "name": "model.layers.8.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29280768 }, { "name": "model.layers.8.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 29706752 }, { "name": "model.layers.8.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 33061376 } ], "md5sum": "94674b8b0cb9f6d133f2ce1429472ee1" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 28361216, "records": [ { "name": "model.layers.8.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3407872 }, { "name": "model.layers.8.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3833856 }, { "name": "model.layers.8.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7188480 }, { "name": "model.layers.8.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7607808 }, { "name": "model.layers.8.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11015680 }, { "name": "model.layers.8.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11441664 }, { "name": "model.layers.8.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14796288 }, { "name": "model.layers.8.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 15215616 }, { "name": "model.layers.8.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18623488 }, { "name": "model.layers.8.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 19049472 }, { "name": "model.layers.8.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 22404096 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22823424 }, { "name": "model.layers.9.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 22831616 }, { "name": "model.layers.9.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 27746816 } ], "md5sum": "c924fd17898ac69620fcd33789b8c460" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.9.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.9.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.9.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.9.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "c4d6deaac622064aeaa1dd742ed5666b" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 33480704, "records": [ { "name": "model.layers.9.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.9.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.9.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 5501952 }, { "name": "model.layers.9.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 18711552 }, { "name": "model.layers.9.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 20362752 }, { "name": "model.layers.9.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 25253376 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25864704 }, { "name": "model.layers.9.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 25872896 }, { "name": "model.layers.9.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29280768 }, { "name": "model.layers.9.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 29706752 }, { "name": "model.layers.9.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 33061376 } ], "md5sum": "bf8627d5ef13e90bcbc948cdd8f33659" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 22831616, "records": [ { "name": "model.layers.9.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3407872 }, { "name": "model.layers.9.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3833856 }, { "name": "model.layers.9.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7188480 }, { "name": "model.layers.9.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7607808 }, { "name": "model.layers.9.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11015680 }, { "name": "model.layers.9.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11441664 }, { "name": "model.layers.9.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14796288 }, { "name": "model.layers.9.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 15215616 }, { "name": "model.layers.9.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18623488 }, { "name": "model.layers.9.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 19049472 }, { "name": "model.layers.9.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 22404096 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22823424 } ], "md5sum": "688b1b11440ff72ca9434fa739017578" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.14.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.14.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.14.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.14.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "2d32a4290b699da359505bd1496839aa" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 25834432, "records": [ { "name": "model.layers.14.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.14.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5501952 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5510144 }, { "name": "model.layers.15.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 5518336 }, { "name": "model.layers.15.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 10433536 }, { "name": "model.layers.15.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 11047936 }, { "name": "model.layers.15.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 24191488 } ], "md5sum": "fda4aaf5e3d62ff97fa88f609112f7c2" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 20362752, "records": [ { "name": "model.layers.15.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 0 }, { "name": "model.layers.15.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 13209600 }, { "name": "model.layers.15.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 14860800 }, { "name": "model.layers.15.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 19751424 } ], "md5sum": "2eb91ce147e1ed8a56c77a5826edc3a2" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 31812608, "records": [ { "name": "model.layers.15.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 0 }, { "name": "model.layers.15.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 13209600 }, { "name": "model.layers.15.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 14860800 }, { "name": "model.layers.15.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 19751424 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20362752 }, { "name": "model.layers.15.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 20370944 }, { "name": "model.layers.15.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 23778816 }, { "name": "model.layers.15.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 24204800 }, { "name": "model.layers.15.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 27559424 }, { "name": "model.layers.15.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 27978752 }, { "name": "model.layers.15.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 31386624 } ], "md5sum": "65eeabe570b7cae849d39c1d1564fca6" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 24527360, "records": [ { "name": "model.layers.15.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 3354624 }, { "name": "model.layers.15.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 3773952 }, { "name": "model.layers.15.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 7181824 }, { "name": "model.layers.15.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 7607808 }, { "name": "model.layers.15.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 10962432 }, { "name": "model.layers.15.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 11381760 }, { "name": "model.layers.15.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 14789632 }, { "name": "model.layers.15.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 15215616 }, { "name": "model.layers.15.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 18570240 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18989568 }, { "name": "model.layers.16.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 18997760 }, { "name": "model.layers.16.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 23912960 } ], "md5sum": "b2872048cfb7806c5fab28ffcac07318" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.16.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.16.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.16.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.16.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "8fb5556ef6aa721ae50ba8ff65ab576c" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 33480704, "records": [ { "name": "model.layers.16.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.16.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.16.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 5501952 }, { "name": "model.layers.16.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 18711552 }, { "name": "model.layers.16.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 20362752 }, { "name": "model.layers.16.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 25253376 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25864704 }, { "name": "model.layers.16.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 25872896 }, { "name": "model.layers.16.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29280768 }, { "name": "model.layers.16.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 29706752 }, { "name": "model.layers.16.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 33061376 } ], "md5sum": "cf7451b6a6e4751a5453294fc0598591" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 28361216, "records": [ { "name": "model.layers.16.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3407872 }, { "name": "model.layers.16.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3833856 }, { "name": "model.layers.16.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7188480 }, { "name": "model.layers.16.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7607808 }, { "name": "model.layers.16.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11015680 }, { "name": "model.layers.16.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11441664 }, { "name": "model.layers.16.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14796288 }, { "name": "model.layers.16.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 15215616 }, { "name": "model.layers.16.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18623488 }, { "name": "model.layers.16.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 19049472 }, { "name": "model.layers.16.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 22404096 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22823424 }, { "name": "model.layers.17.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 22831616 }, { "name": "model.layers.17.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 27746816 } ], "md5sum": "10e5afc77c4018de595ddbf9a70511ce" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.17.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.17.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.17.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.17.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "573a6fab79fdc11f54541ce258f70683" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 33480704, "records": [ { "name": "model.layers.17.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.17.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.17.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 5501952 }, { "name": "model.layers.17.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 18711552 }, { "name": "model.layers.17.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 20362752 }, { "name": "model.layers.17.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 25253376 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25864704 }, { "name": "model.layers.17.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 25872896 }, { "name": "model.layers.17.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29280768 }, { "name": "model.layers.17.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 29706752 }, { "name": "model.layers.17.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 33061376 } ], "md5sum": "22f0d64cdbf76682ecbc7c8031ee9d8b" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 28361216, "records": [ { "name": "model.layers.17.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3407872 }, { "name": "model.layers.17.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3833856 }, { "name": "model.layers.17.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7188480 }, { "name": "model.layers.17.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7607808 }, { "name": "model.layers.17.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11015680 }, { "name": "model.layers.17.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11441664 }, { "name": "model.layers.17.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14796288 }, { "name": "model.layers.17.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 15215616 }, { "name": "model.layers.17.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18623488 }, { "name": "model.layers.17.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 19049472 }, { "name": "model.layers.17.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 22404096 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22823424 }, { "name": "model.layers.18.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 22831616 }, { "name": "model.layers.18.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 27746816 } ], "md5sum": "293613e3a385a9001f71453af21af8d4" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.18.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.18.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.18.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.18.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "c46433ae33581cdc14490721f498277c" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 33480704, "records": [ { "name": "model.layers.18.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.18.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.18.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 5501952 }, { "name": "model.layers.18.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 18711552 }, { "name": "model.layers.18.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 20362752 }, { "name": "model.layers.18.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 25253376 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25864704 }, { "name": "model.layers.18.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 25872896 }, { "name": "model.layers.18.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29280768 }, { "name": "model.layers.18.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 29706752 }, { "name": "model.layers.18.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 33061376 } ], "md5sum": "a70da3e72f9524f3287c56e1c10d4d8d" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 28361216, "records": [ { "name": "model.layers.18.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3407872 }, { "name": "model.layers.18.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3833856 }, { "name": "model.layers.18.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7188480 }, { "name": "model.layers.18.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7607808 }, { "name": "model.layers.18.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11015680 }, { "name": "model.layers.18.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11441664 }, { "name": "model.layers.18.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14796288 }, { "name": "model.layers.18.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 15215616 }, { "name": "model.layers.18.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18623488 }, { "name": "model.layers.18.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 19049472 }, { "name": "model.layers.18.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 22404096 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22823424 }, { "name": "model.layers.19.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 22831616 }, { "name": "model.layers.19.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 27746816 } ], "md5sum": "ce7ce486f0427668fdf93a7d07edbff7" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.19.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.19.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.19.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.19.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "7981efdaeec356fca7dfddd94c948a60" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 33480704, "records": [ { "name": "model.layers.19.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.19.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.19.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 5501952 }, { "name": "model.layers.19.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 18711552 }, { "name": "model.layers.19.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 20362752 }, { "name": "model.layers.19.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 25253376 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25864704 }, { "name": "model.layers.19.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 25872896 }, { "name": "model.layers.19.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29280768 }, { "name": "model.layers.19.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 29706752 }, { "name": "model.layers.19.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 33061376 } ], "md5sum": "cb33d6a8508150571ec5e14cff50578c" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 28361216, "records": [ { "name": "model.layers.19.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3407872 }, { "name": "model.layers.19.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3833856 }, { "name": "model.layers.19.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7188480 }, { "name": "model.layers.19.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7607808 }, { "name": "model.layers.19.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11015680 }, { "name": "model.layers.19.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11441664 }, { "name": "model.layers.19.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14796288 }, { "name": "model.layers.19.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 15215616 }, { "name": "model.layers.19.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18623488 }, { "name": "model.layers.19.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 19049472 }, { "name": "model.layers.19.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 22404096 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22823424 }, { "name": "model.layers.20.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 22831616 }, { "name": "model.layers.20.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 27746816 } ], "md5sum": "53a1f97405420ce6d8c8b40777aa82d0" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.20.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.20.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.20.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.20.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "aa6f87871c402cfaaeee7513cb133426" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 33480704, "records": [ { "name": "model.layers.20.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.20.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.20.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 5501952 }, { "name": "model.layers.20.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 18711552 }, { "name": "model.layers.20.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 20362752 }, { "name": "model.layers.20.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 25253376 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25864704 }, { "name": "model.layers.20.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 25872896 }, { "name": "model.layers.20.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29280768 }, { "name": "model.layers.20.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 29706752 }, { "name": "model.layers.20.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 33061376 } ], "md5sum": "b31c8b058ff7c59914afef0ed28a6830" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 28361216, "records": [ { "name": "model.layers.20.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3407872 }, { "name": "model.layers.20.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3833856 }, { "name": "model.layers.20.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7188480 }, { "name": "model.layers.20.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7607808 }, { "name": "model.layers.20.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11015680 }, { "name": "model.layers.20.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11441664 }, { "name": "model.layers.20.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14796288 }, { "name": "model.layers.20.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 15215616 }, { "name": "model.layers.20.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18623488 }, { "name": "model.layers.20.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 19049472 }, { "name": "model.layers.20.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 22404096 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22823424 }, { "name": "model.layers.21.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 22831616 }, { "name": "model.layers.21.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 27746816 } ], "md5sum": "791b4613b0f440ff20e2fa36ebe9d552" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.21.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.21.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.21.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.21.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "aa7884001c69a17446f78c872d2aa67f" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 33480704, "records": [ { "name": "model.layers.21.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.21.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.21.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 5501952 }, { "name": "model.layers.21.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 18711552 }, { "name": "model.layers.21.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 20362752 }, { "name": "model.layers.21.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 25253376 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25864704 }, { "name": "model.layers.21.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 25872896 }, { "name": "model.layers.21.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29280768 }, { "name": "model.layers.21.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 29706752 }, { "name": "model.layers.21.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 33061376 } ], "md5sum": "25d15907fd704bed06298fbd0d1370f5" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 28361216, "records": [ { "name": "model.layers.21.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3407872 }, { "name": "model.layers.21.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3833856 }, { "name": "model.layers.21.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7188480 }, { "name": "model.layers.21.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7607808 }, { "name": "model.layers.21.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11015680 }, { "name": "model.layers.21.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11441664 }, { "name": "model.layers.21.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14796288 }, { "name": "model.layers.21.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 15215616 }, { "name": "model.layers.21.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18623488 }, { "name": "model.layers.21.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 19049472 }, { "name": "model.layers.21.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 22404096 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22823424 }, { "name": "model.layers.22.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 22831616 }, { "name": "model.layers.22.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 27746816 } ], "md5sum": "761cace575fbb4be1d8991e3fa7e230a" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.22.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.22.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.22.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.22.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "d50d01a3ce677611610db57249fb825b" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 33480704, "records": [ { "name": "model.layers.22.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.22.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.22.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 5501952 }, { "name": "model.layers.22.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 18711552 }, { "name": "model.layers.22.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 20362752 }, { "name": "model.layers.22.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 25253376 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25864704 }, { "name": "model.layers.22.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 25872896 }, { "name": "model.layers.22.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29280768 }, { "name": "model.layers.22.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 29706752 }, { "name": "model.layers.22.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 33061376 } ], "md5sum": "96773f5f79393eb3353820cfa75f4017" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 28361216, "records": [ { "name": "model.layers.22.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3407872 }, { "name": "model.layers.22.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3833856 }, { "name": "model.layers.22.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7188480 }, { "name": "model.layers.22.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7607808 }, { "name": "model.layers.22.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11015680 }, { "name": "model.layers.22.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11441664 }, { "name": "model.layers.22.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14796288 }, { "name": "model.layers.22.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 15215616 }, { "name": "model.layers.22.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18623488 }, { "name": "model.layers.22.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 19049472 }, { "name": "model.layers.22.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 22404096 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22823424 }, { "name": "model.layers.23.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 22831616 }, { "name": "model.layers.23.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 27746816 } ], "md5sum": "d6c1d79ad0f7f7d7914353f8dff61271" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.23.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.23.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.23.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.23.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "d7cc24e762b0924ff58ddc2877d83419" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 33480704, "records": [ { "name": "model.layers.23.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.23.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.23.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 5501952 }, { "name": "model.layers.23.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 18711552 }, { "name": "model.layers.23.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 20362752 }, { "name": "model.layers.23.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 25253376 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25864704 }, { "name": "model.layers.23.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 25872896 }, { "name": "model.layers.23.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29280768 }, { "name": "model.layers.23.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 29706752 }, { "name": "model.layers.23.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 33061376 } ], "md5sum": "6caf256a64ec007840f443934547a395" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 28361216, "records": [ { "name": "model.layers.23.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3407872 }, { "name": "model.layers.23.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3833856 }, { "name": "model.layers.23.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7188480 }, { "name": "model.layers.23.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7607808 }, { "name": "model.layers.23.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11015680 }, { "name": "model.layers.23.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11441664 }, { "name": "model.layers.23.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14796288 }, { "name": "model.layers.23.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 15215616 }, { "name": "model.layers.23.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18623488 }, { "name": "model.layers.23.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 19049472 }, { "name": "model.layers.23.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 22404096 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22823424 }, { "name": "model.layers.24.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 22831616 }, { "name": "model.layers.24.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 27746816 } ], "md5sum": "acbee8493bda9cc9f2084f7e9c07f242" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.24.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.24.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.24.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.24.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "73f15b7032620e49f8936ebc3f008d4a" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 33480704, "records": [ { "name": "model.layers.24.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.24.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.24.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 5501952 }, { "name": "model.layers.24.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 18711552 }, { "name": "model.layers.24.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 20362752 }, { "name": "model.layers.24.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 25253376 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25864704 }, { "name": "model.layers.24.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 25872896 }, { "name": "model.layers.24.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29280768 }, { "name": "model.layers.24.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 29706752 }, { "name": "model.layers.24.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 33061376 } ], "md5sum": "b91fb9cd5249e20c99e270d9f8c3e52e" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 28361216, "records": [ { "name": "model.layers.24.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3407872 }, { "name": "model.layers.24.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3833856 }, { "name": "model.layers.24.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7188480 }, { "name": "model.layers.24.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7607808 }, { "name": "model.layers.24.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11015680 }, { "name": "model.layers.24.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11441664 }, { "name": "model.layers.24.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14796288 }, { "name": "model.layers.24.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 15215616 }, { "name": "model.layers.24.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18623488 }, { "name": "model.layers.24.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 19049472 }, { "name": "model.layers.24.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 22404096 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22823424 }, { "name": "model.layers.25.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 22831616 }, { "name": "model.layers.25.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 27746816 } ], "md5sum": "1ffc5bc9e66ca3520afcc65d3b22fc02" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.25.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.25.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.25.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.25.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "e6b8c7166f4e28a9a0ccb10643b3a3e2" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 33480704, "records": [ { "name": "model.layers.25.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.25.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.25.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 5501952 }, { "name": "model.layers.25.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 18711552 }, { "name": "model.layers.25.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 20362752 }, { "name": "model.layers.25.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 25253376 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25864704 }, { "name": "model.layers.25.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 25872896 }, { "name": "model.layers.25.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29280768 }, { "name": "model.layers.25.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 29706752 }, { "name": "model.layers.25.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 33061376 } ], "md5sum": "8652e88f6fdc477798dc7c4411944437" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 28361216, "records": [ { "name": "model.layers.25.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3407872 }, { "name": "model.layers.25.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3833856 }, { "name": "model.layers.25.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7188480 }, { "name": "model.layers.25.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7607808 }, { "name": "model.layers.25.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11015680 }, { "name": "model.layers.25.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11441664 }, { "name": "model.layers.25.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14796288 }, { "name": "model.layers.25.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 15215616 }, { "name": "model.layers.25.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18623488 }, { "name": "model.layers.25.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 19049472 }, { "name": "model.layers.25.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 22404096 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22823424 }, { "name": "model.layers.26.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 22831616 }, { "name": "model.layers.26.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 27746816 } ], "md5sum": "173811a0ea69d2e0d7404e0a7c8a1b03" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.26.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.26.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.26.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.26.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "3ebf10d5f11fdb2310d56911fe4db39d" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 33480704, "records": [ { "name": "model.layers.26.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.26.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.26.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 5501952 }, { "name": "model.layers.26.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 18711552 }, { "name": "model.layers.26.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 20362752 }, { "name": "model.layers.26.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 25253376 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25864704 }, { "name": "model.layers.26.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 25872896 }, { "name": "model.layers.26.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29280768 }, { "name": "model.layers.26.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 29706752 }, { "name": "model.layers.26.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 33061376 } ], "md5sum": "d957fd1965331a2969745b7bed77db64" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 28361216, "records": [ { "name": "model.layers.26.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3407872 }, { "name": "model.layers.26.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3833856 }, { "name": "model.layers.26.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7188480 }, { "name": "model.layers.26.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7607808 }, { "name": "model.layers.26.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11015680 }, { "name": "model.layers.26.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11441664 }, { "name": "model.layers.26.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14796288 }, { "name": "model.layers.26.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 15215616 }, { "name": "model.layers.26.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18623488 }, { "name": "model.layers.26.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 19049472 }, { "name": "model.layers.26.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 22404096 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22823424 }, { "name": "model.layers.27.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 22831616 }, { "name": "model.layers.27.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 27746816 } ], "md5sum": "37ab5c57049e2c53ef664981b6c6fff8" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.27.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.27.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.27.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.27.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "93cea88fb9e251c8c53f4d0b0e738d90" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 33480704, "records": [ { "name": "model.layers.27.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.27.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.27.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 5501952 }, { "name": "model.layers.27.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 18711552 }, { "name": "model.layers.27.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 20362752 }, { "name": "model.layers.27.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 25253376 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25864704 }, { "name": "model.layers.27.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 25872896 }, { "name": "model.layers.27.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29280768 }, { "name": "model.layers.27.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 29706752 }, { "name": "model.layers.27.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 33061376 } ], "md5sum": "d21ddc726a375c06206551b8c610fd3d" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 28361216, "records": [ { "name": "model.layers.27.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3407872 }, { "name": "model.layers.27.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3833856 }, { "name": "model.layers.27.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7188480 }, { "name": "model.layers.27.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7607808 }, { "name": "model.layers.27.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11015680 }, { "name": "model.layers.27.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11441664 }, { "name": "model.layers.27.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14796288 }, { "name": "model.layers.27.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 15215616 }, { "name": "model.layers.27.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18623488 }, { "name": "model.layers.27.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 19049472 }, { "name": "model.layers.27.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 22404096 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22823424 }, { "name": "model.layers.28.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 22831616 }, { "name": "model.layers.28.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 27746816 } ], "md5sum": "89dec68a1f6f702118c8c960e7c3afb1" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.28.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.28.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.28.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.28.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "79363491a4d3cb15ecb94079e5fb8eae" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 33480704, "records": [ { "name": "model.layers.28.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.28.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.28.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 5501952 }, { "name": "model.layers.28.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 18711552 }, { "name": "model.layers.28.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 20362752 }, { "name": "model.layers.28.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 25253376 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25864704 }, { "name": "model.layers.28.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 25872896 }, { "name": "model.layers.28.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29280768 }, { "name": "model.layers.28.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 29706752 }, { "name": "model.layers.28.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 33061376 } ], "md5sum": "66ea4f7b0786057c6e691f5c47f24cb5" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 28361216, "records": [ { "name": "model.layers.28.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3407872 }, { "name": "model.layers.28.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3833856 }, { "name": "model.layers.28.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7188480 }, { "name": "model.layers.28.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7607808 }, { "name": "model.layers.28.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11015680 }, { "name": "model.layers.28.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11441664 }, { "name": "model.layers.28.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14796288 }, { "name": "model.layers.28.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 15215616 }, { "name": "model.layers.28.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18623488 }, { "name": "model.layers.28.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 19049472 }, { "name": "model.layers.28.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 22404096 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22823424 }, { "name": "model.layers.29.mlp.down_u_proj.q_weight", "shape": [ 300, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4915200, "byteOffset": 22831616 }, { "name": "model.layers.29.mlp.down_u_proj.q_scale", "shape": [ 75, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 614400, "byteOffset": 27746816 } ], "md5sum": "8ea4888a93ff2decaeaa4682907359d2" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 29647296, "records": [ { "name": "model.layers.29.mlp.down_v_proj.q_weight", "shape": [ 1376, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13143552, "byteOffset": 0 }, { "name": "model.layers.29.mlp.down_v_proj.q_scale", "shape": [ 344, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1642944, "byteOffset": 13143552 }, { "name": "model.layers.29.mlp.gate_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 14786496 }, { "name": "model.layers.29.mlp.gate_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 27996096 } ], "md5sum": "b7b7f0468cca27ef3c135fac00c20623" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 33480704, "records": [ { "name": "model.layers.29.mlp.gate_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 0 }, { "name": "model.layers.29.mlp.gate_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 4890624 }, { "name": "model.layers.29.mlp.up_u_proj.q_weight", "shape": [ 300, 11008 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13209600, "byteOffset": 5501952 }, { "name": "model.layers.29.mlp.up_u_proj.q_scale", "shape": [ 75, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1651200, "byteOffset": 18711552 }, { "name": "model.layers.29.mlp.up_v_proj.q_weight", "shape": [ 512, 2388 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4890624, "byteOffset": 20362752 }, { "name": "model.layers.29.mlp.up_v_proj.q_scale", "shape": [ 128, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 611328, "byteOffset": 25253376 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25864704 }, { "name": "model.layers.29.self_attn.k_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 25872896 }, { "name": "model.layers.29.self_attn.k_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29280768 }, { "name": "model.layers.29.self_attn.k_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 29706752 }, { "name": "model.layers.29.self_attn.k_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 33061376 } ], "md5sum": "fb0ce9ed719c7a1a0ac4cce22ab8e4c1" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 26657280, "records": [ { "name": "model.layers.29.self_attn.o_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3407872 }, { "name": "model.layers.29.self_attn.o_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 3833856 }, { "name": "model.layers.29.self_attn.o_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 7188480 }, { "name": "model.layers.29.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 7607808 }, { "name": "model.layers.29.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11015680 }, { "name": "model.layers.29.self_attn.q_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 11441664 }, { "name": "model.layers.29.self_attn.q_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 14796288 }, { "name": "model.layers.29.self_attn.v_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 15215616 }, { "name": "model.layers.29.self_attn.v_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18623488 }, { "name": "model.layers.29.self_attn.v_v_proj.q_weight", "shape": [ 512, 1638 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3354624, "byteOffset": 19049472 }, { "name": "model.layers.29.self_attn.v_v_proj.q_scale", "shape": [ 128, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 419328, "byteOffset": 22404096 }, { "name": "model.layers.30.self_attn.q_u_proj.q_weight", "shape": [ 208, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3407872, "byteOffset": 22823424 }, { "name": "model.layers.30.self_attn.q_u_proj.q_scale", "shape": [ 52, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 26231296 } ], "md5sum": "1201aec368ae63306799e79579796724" } ] }