{ "metadata": { "ParamSize": 282, "ParamBytes": 5933193216.0, "BitsPerParam": 5.559536167513375 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 1572864000, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 256000, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864000, "byteOffset": 0 } ], "md5sum": "c60082db0e4d8f35e7289fd7de2d6953" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "0a3f375cf1e2c9f695595fbdb0460894" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b1b4b490c123e9fda95c5ad6a114814a" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 33042432, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 0 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 6144 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 4724736 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14161920 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 14168064 } ], "md5sum": "6c41cde98416da0cfce2f890191b72d4" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "b9b930f7da641c5a595835013ee6becd" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "df24aca81ca517f0c6c929ea97f9606d" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "19f5d91c818853f75337644c5d0460e8" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "914b8140b0803c1be930426d6e4f86eb" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 33048576, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 2359296 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 8650752 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 9443328 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 14161920 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23599104 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23605248 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 25964544 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32256000 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33042432 } ], "md5sum": "dda0ce191ffd36396f60d1dde7ecd6c1" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "fa05c4b0573277e0e25e9431b8ada963" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 33036288, "records": [ { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 4718592 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14155776 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 14161920 } ], "md5sum": "970988da55e20eb74b4d7eb951f24e7a" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "895796fa01c5ae32fe106ae0c8251a27" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "704305446ba2953b62e9ceebadd07063" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "028dd1dcf35dac597489b07af9dfdb2f" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "a77612a121069421dd3899b31cdff6b4" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 33048576, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 2359296 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 8650752 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 9443328 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 14161920 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23599104 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23605248 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 25964544 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32256000 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33042432 } ], "md5sum": "2350bb04bee0a06eb34afd2647843d21" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "4fdf97af5d9e34f3e189cd5d9fa5e88d" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 33036288, "records": [ { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 4718592 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14155776 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 14161920 } ], "md5sum": "040ea811b5b654731135e29293d63180" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "3111e33cfe433290fbff3775e89e11ae" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "15a6a1ec16ab97ad9601da73dcc298b7" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "c941e6d371dbfe46ca546ad50ccadab2" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "ef2cc584a481627c4dcbc7c625ad6dcc" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 33042432, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 2359296 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 8650752 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 9443328 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 14161920 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23599104 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23605248 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 25964544 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32256000 } ], "md5sum": "ca0f9e187a584fc256c0230643afbbc6" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "5a645ad8a7ad21de07365029a99fe953" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0c37067c216179dca79b22da7274b3a1" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "f303958ead6e2daaa3336967c361c85f" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "b3ad0f343d0f749ffd6f8ec5b7c41213" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 33048576, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 2359296 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 8650752 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 9443328 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 14161920 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23599104 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23605248 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 25964544 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32256000 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33042432 } ], "md5sum": "36cb87785900c8b03ea04cadcd2c7b61" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "888fe098f5448c40531e0f73801a71cc" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 33036288, "records": [ { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 4718592 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14155776 }, { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 14161920 } ], "md5sum": "44056dd4b713c6d0c292d5876b50d1cb" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "8979fcbb6ffc9999a802dcb36db2c110" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "c1b503e36756e5fa64fa6d6c536935c9" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "cfa48f27791f04a6afc001c498096009" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "47cec6b9dd1fa4bd2b5f112b232f9b68" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 33048576, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 2359296 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 8650752 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 9443328 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 14161920 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23599104 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23605248 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 25964544 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32256000 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33042432 } ], "md5sum": "9911946bdd7cb423c25569b240fcaa84" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "21dfcc93791de79d358495202b300837" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 33036288, "records": [ { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 4718592 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14155776 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 14161920 } ], "md5sum": "e99baebdb292d9f650b2ea5541b25c86" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "439729033342c3139e80c0b0414c1728" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "5d9354c58b9395b7e0e9b044725dcc20" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "4291b97ecc1fab9b6515a05a8dbd24bb" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0f83efe157e1c158f2c27a24b2828d69" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 33042432, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 2359296 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 8650752 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 9443328 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 14161920 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23599104 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23605248 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 25964544 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32256000 } ], "md5sum": "d5f77226ac89a3c55700e7fd7812778a" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "a24e5f432c8b50da8d46aaf81ad468ec" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d7bbc1d181bf43907a1a092f3c50f7ab" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "fd461ae89dccedcef9638e2cb48984b7" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "2a0d3cff4f52ceec8e8f6c40af9c0a50" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 28329984, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 2359296 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 8650752 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 9443328 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 14161920 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23599104 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23605248 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 23611392 } ], "md5sum": "4c1d3f579707ec96351b6caf43eb61f7" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 30676992, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 9443328 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 28317696 } ], "md5sum": "e7b9ca8c828248cdf4e043d02e77398b" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "4c795c1fb95ddbc0651868a79f712180" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "61bf50a5ea090aa9cb45fedb191a8dd9" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "61b7de5a963bfe8f5d912e6a11fa5104" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "a83b0c205aac3c3191f9f8f1100581c5" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 30689280, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 6291456 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 7077888 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 7084032 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11802624 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21239808 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21245952 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 23605248 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 29896704 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30683136 } ], "md5sum": "641084e3ed190be0ad41361c1f891679" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "91068e71cafed5fd13a8b18b24a2abbd" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 33036288, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 4718592 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14155776 }, { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 14161920 } ], "md5sum": "c717ea3935ddf0deb10e7b0f579eedc5" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "d33d060e3517f6d3c4031d3860ada555" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d1e366e579be3b2ea952452d40c29871" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "95b35efde9d72c464a57053ebfb8302e" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "9c4ee949ac59a0d6e41cd9676b15bd8b" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 28329984, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 2359296 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 8650752 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 9443328 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 14161920 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23599104 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23605248 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 23611392 } ], "md5sum": "0ab3121d00fb54cf47b02726f2f6e265" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 30676992, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 9443328 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 28317696 } ], "md5sum": "1c5c872c45d3a9a18d4f13a7999745d8" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "8f4983e8d268430490a7bfd2089271b2" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b3562d605f03006f65eb486771e9e3f7" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "56bb44a8a76c8da8bed0a852d027b3a2" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "6cca4e4da80cc4104bcf42fe370c2e00" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 30689280, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 6291456 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 7077888 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 7084032 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11802624 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21239808 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21245952 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 23605248 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 29896704 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30683136 } ], "md5sum": "4944d992b1b0d8b041ea6c4762b11b09" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "3d78ee2bc235a467be96bdce00bc4579" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 33036288, "records": [ { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 4718592 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14155776 }, { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 14161920 } ], "md5sum": "a756e530dee48ebb741ead864edbc566" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "e7ca1640cce62a24eb185527535cc2a4" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "21c78957b1967bc0e9d8a0c667226b53" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "dad0426f022ac9380153e56b771a205c" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "e6518b4dd0883453191e06ed464c5145" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 33048576, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 2359296 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 8650752 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 9443328 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 14161920 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23599104 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23605248 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 25964544 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32256000 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33042432 } ], "md5sum": "5a31c95c108b21de43eb104e4c32b9c5" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "bfcb8919f7eb1a560c59de147e4644a9" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 33036288, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 4718592 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14155776 }, { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 14161920 } ], "md5sum": "a17ed1d6946390eec3d604ae75cf0716" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "2b21178688c488bfcf39ba2e5e976744" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "36ae8787b767df38c9911ad4534b4ff6" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "4dfbe51c1e27f3c2163064d8bf1fe47b" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "9ad5a6b391692cd7ba2f211394f4841d" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 33048576, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 2359296 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 8650752 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 9443328 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 14161920 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23599104 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23605248 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 25964544 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32256000 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33042432 } ], "md5sum": "142d51dc1cffe753399d2e152731067c" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "e0de82327d5ecf92c4d6a7bbe8e903fe" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 33036288, "records": [ { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 4718592 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14155776 }, { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 14161920 } ], "md5sum": "73a8a7599dc02a49a1b61f70c01b6994" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "923e90249de0a8cc9c2bfd2fa160e42a" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "377b2fd5902e392774e9dc38cc649031" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e8e9c6fe7760ae3517a0e34e1ede66e4" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "c33d5fe78f612f2c382f21b7ff2e0466" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 33042432, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 2359296 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 8650752 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 9443328 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 14161920 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23599104 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23605248 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 25964544 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32256000 } ], "md5sum": "ce934c7e6c41e76269127505d5ddbc4e" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "0151efeb7b93d74d86dd9291f0266ded" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "08663cb64732b524fb1515204569256e" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "8587334165a82190371cc1dce4c975f8" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "4264e7b9a235389f27b36fefa35d0d55" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 28329984, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 2359296 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 8650752 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 9443328 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 14161920 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23599104 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23605248 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 23611392 } ], "md5sum": "ff0d65b3d60947dcd0bb54b130e7054b" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 30676992, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 9443328 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 28317696 } ], "md5sum": "d379684e292777f3e0feeb5c8c0f477d" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "94bd6c4a7893492f003ec40434ef33d9" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d51d6761865d3fec1b9ae7f6e6a12fdf" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "8091897b86e142f4ad2788235ea7f01f" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "c300ececd456b552ed7999a8b9da9b97" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 30689280, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 6291456 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 7077888 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 7084032 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11802624 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21239808 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21245952 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 23605248 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 29896704 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30683136 } ], "md5sum": "0329898e07322fa4c059b54dfe321db3" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "3723ed2c9b615ba20a1469402e68ed5e" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 33036288, "records": [ { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 4718592 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14155776 }, { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 14161920 } ], "md5sum": "652911ef304a85f54ad9655496c6b050" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 9443328, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 2359296 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 8650752 }, { "name": "model.norm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 } ], "md5sum": "3ab9be0e91ef4f8193bdd1d5fa3189ed" } ] }