{ "metadata": { "ParamSize": 325, "ParamBytes": 4212490240.0, "BitsPerParam": 5.001066770081567 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 65568768, "records": [ { "name": "lm_head.q_weight", "shape": [ 32016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65568768, "byteOffset": 0 } ], "md5sum": "f85e52248bbea32b22b0980b36626192" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 30748672, "records": [ { "name": "lm_head.q_scale", "shape": [ 32016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8196096, "byteOffset": 0 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 8196096 }, { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8204288 } ], "md5sum": "43bda03c7468d9d400fd49d0574745a4" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "dbb6f85e88c3a6cca9695f6035724db5" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 28196864, "records": [ { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 2818048 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 2826240 }, { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 2834432 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 25378816 } ], "md5sum": "06be15bccf1c9bda0bae3f639cf41196" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 30810112, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 5636096 }, { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 5644288 } ], "md5sum": "9171f0f1e5a5c051fd0382c2912d9c05" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "a951dbc3cdac7df6d7d59d3e90d73fc0" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "65d57daffea784b10f35b538a9a83267" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "bfecb540df73f5d0f45e8dc6e75cc8ff" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "8fac56d965afd42d0b4dd0bbc0d215ba" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "6e60247cb4dbe69a242d33a387bd26f3" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a70f87c9e61c21167af0e920624ac157" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "470e521324fe33497ea9994a3af244fe" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "5d86dadf6d55316cff7cdeac7674cf09" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "fcca77b3c7ea7486e5feb30e632bb6ae" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1dc2ea571ff8a58063635cdacabc4c5d" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "a39c93f23f60a2a9045ffb54f9cf5e03" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "463474766c3e71481251c276a1e4c0d4" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8b13b66604e8eb8a8524a663a12ae435" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "564d9d4e741fae401c7518fe8ab09fb7" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "2b548f620bd7cfaa32f321fb1bc38d70" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "cee5f537da07377b0a06aee839a219a1" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a91cd8dbfb5caf9cce070e664c08e0c8" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "a70f57084bcecd215209da80b010ae19" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "7c94ea992d73bcb766d04679b1b66f8c" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "73331862db4bef89665eaf0cc82a345d" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "58a6cad2d963f4f2351265d04ce96fa9" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "69e6c0cec60c85b386334b530b0ceb87" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "23ac325136ccd715cb9d8a273c5ec2c2" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6a654b4f453f6900fbada0ec644739cd" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "c93b1a53c04c05c2dd60b61a390a0eb7" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 65568768, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65568768, "byteOffset": 0 } ], "md5sum": "ae9806958f35a4fddc18017adcb54c23" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 31805440, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.embed_tokens.q_scale", "shape": [ 32016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8196096, "byteOffset": 1056768 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 9252864 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9261056 } ], "md5sum": "1805e0729b56e4b7d602fd6ee819724d" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "b7aecfdd4af0b1bdd32122530f108227" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ff5934980f24c278a9f64268dc0f851e" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "52cf3cc62d6b2b51fd41efe74fc11928" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "e1c46d7580e6197b56cf26cbc7b426bb" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ec571c66edc49c0a193768c623a00b62" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "689806017f010fe8a45977f7a711c4a3" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "6f2ca4c26975ff1335884c652ce5ff67" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "b9a4ad9aea2161f3a18e22a84538151d" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "9ef4c75f079479eea4c360652a67b63e" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "37fbb0c658ca57f6dacc079b05b2df3b" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b2fd3f71dd8f21cb26e81145701ece2f" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 29827072, "records": [ { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 21045248 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 26681344 } ], "md5sum": "cefed4e96f83e8d2e5981d8fa3a7484a" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "c3a7e3674910ef8718173da1ca833217" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "89085373bbe49a1b167d60d42ea10206" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2b626b94329f58227f2d016e662cf696" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "9c8cfc693c781b0a0b8fecc43de4146c" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "2f9a3ccc1551d96e918c45e51fb2b192" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7eb087afd1ac88cf17bf7c527e57212b" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "d4945041c2b28bd91b3fdc8a16ef8d26" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "4300105f71276edaf6172a4fbc36cc74" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "54910d3f3be6c18adc08d7c7ee01e8c9" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7de4f830ac60079e10c9ebc604406913" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "b9d6486d7b29d6636bd71740882623a1" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "07e6546cb5f774442b568e03be1d03dd" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3c6f8f7f5a19d770f8995073d5f9f308" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "12098c2e1498a37dc4be51911e8c63cc" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "c0466976c18f9f97b0d9986fde7635f2" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "160bc1979c8c8fa45d9c618d6efc1ab0" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c4c3f380879cf167b4d7e78b5f4d2f35" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "f0b6269991d7f37049951e55b6ed2b91" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "9b5e941c820017927e1f221dcbe41004" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "058b675143f655dc2647bc822f82c5d9" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "5f62d3a85675bf698e97b790b215f417" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "ba11089c71a3ebe721c9c0151a31851b" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "2380b55d2043529eee74f0c9fe95cf59" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2c5039ac9a48f41f2b437d51850a3c85" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "7ae37dde2ac65c5b110f1822b9df2961" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "e193235eba60e70ad7e968ed2e969ed8" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "068b912fa6d7871cad9061b0824788da" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "05e3a36e843241896d1dee85646d3027" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "6ea4228b1dc3543ed59df250c9610ed4" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "bcdda4511cb8ed0a99c973c02900ee6a" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 28196864, "records": [ { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 2818048 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 2826240 }, { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 2834432 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 25378816 } ], "md5sum": "eda79c0093b611513c9188407d1db223" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 30810112, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 0 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 5636096 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 5644288 } ], "md5sum": "505a1eb72e6c55653a771df059c9f5e9" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "8dccfe1381d54d755832d69867b678ac" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "fab94610f2a5f1ec915c09cba90e9f2b" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "de8ee1ba3bc2491501dcfb8c17084fa7" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "d0c75d6ba9d0f9bd28266f6102b168da" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "7619401648db4fc407bd8e232b4a2a21" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d3849b67441e370178272533cc5c4513" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "0a74acd70a9357fb629236ab787cf056" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "03ed8ea475eece58d7770c306f5fd530" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "559f9f6e313a72fd7d63d0391e54f510" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "90243f46167214da48b90ca60896ff19" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "4d91ee3a87fabcd7b24feabee9518029" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "0dbbfde2420851cc4bed86d67ed31773" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "99db738659f01ec30d9605b6b6d5b128" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "2df9139b2a11edbf6cb0eebf0ad52108" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "628efe4f52b4bde4318f35053d889cce" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "42f78bd94074e3f6b3b8ffc043eb09d9" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b3fd0193e07eb1add788f9082e148f35" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "b4e04e902312643cb7862ff50ad6b66c" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "7eacd4e7338671417596fcb59202d272" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0e97915b1470363b79fd210d43fc8ba1" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "5ce369f17224dca75d9cb0e4a5990bf8" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "0271a2d2ba716727b8b87a8a0c98f085" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "685f3cf5a494bddc7d34562c6947603c" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d0392528e31a00d0854e7b3754184476" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "8e66810352103772c88eb05039c08897" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "b1dc968194e74582e1c49cbfd125927f" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "814bf460a614341b3d859a3b93497f9a" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "e28f0db679a13d6eae3d6d1acec5d924" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "a518f8e4244fe0d8ccc0c8740d4f61d9" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "4c0ad39c7b643e1ed7f9282531478469" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a20c350842aa062f87f767b54327bfe3" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "9a22d89967b23c1351168a0276b748bc" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "734f62e45f6b285dbe2a61faa9d688c1" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c9056eed6284eb476df94655b83ae459" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "cdf1957b3485bd200c5f3c2fcbf65e63" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "c7e7bca10d71242ecf82aa4cbfebf45f" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1734f11cb82f44c482f1507d9ef98770" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 30801920, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 12582912 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 18219008 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21364736 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 29753344 } ], "md5sum": "6ee8c0dfe33b437ecf48f88d1b087d0b" } ] }