{ "metadata": { "ParamSize": 603, "ParamBytes": 3851547248.0, "BitsPerParam": 4.501659816357815 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 65667072, "records": [ { "name": "language_model.lm_head.q_weight", "shape": [ 32064, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65667072, "byteOffset": 0 } ], "md5sum": "cfcfba7ebfde728ba9817b1670a21cc7" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 30760960, "records": [ { "name": "language_model.lm_head.q_scale", "shape": [ 32064, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8208384, "byteOffset": 0 }, { "name": "language_model.model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8208384 }, { "name": "language_model.model.layers.23.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8216576 } ], "md5sum": "f7b0cf279a0248164549a2336f41bc1a" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "db5169acfdb15bc4a863f03157abc842" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 31014912, "records": [ { "name": "language_model.model.layers.23.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "language_model.model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "language_model.model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "language_model.model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8462336 }, { "name": "language_model.model.layers.24.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8470528 } ], "md5sum": "2159c8b7b451d531983df737d8446524" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "fe7e1e771f1e8d4b1d367e088ac1811d" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a6203e78616384a6f2f6d4e0954bd3fd" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "language_model.model.layers.25.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "1dd381777fa70b9673373c2e71e165ea" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "a215c5a3ebe4ed29731a081a5e7ceef5" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "749f95aed97c71eb327e0849c1aed634" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "language_model.model.layers.24.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "language_model.model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "language_model.model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "language_model.model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "language_model.model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "language_model.model.layers.24.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "language_model.model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "language_model.model.layers.25.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "language_model.model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "language_model.model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "language_model.model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "47103e379a289def614afbbf95503b4b" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "language_model.model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "language_model.model.layers.25.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "language_model.model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "language_model.model.layers.26.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "414c8afe8cb889b202ec007620399a7e" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "5d8ba2fcaca09bd0f1eb55582fd57f48" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b0e4b067f03dc7ca2c6f44922cb8d9cc" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "language_model.model.layers.27.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "aea98685f1bea5a3761dc9d04d50a318" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "5de781124f0497232bc1f9cec2c1ca66" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0493b8e43308ac480c0194e213cfbf5f" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "language_model.model.layers.26.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "language_model.model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "language_model.model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "language_model.model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "language_model.model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "language_model.model.layers.26.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "language_model.model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "language_model.model.layers.27.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "language_model.model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "language_model.model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "language_model.model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "43fe6ecf3bf9e90d7591f088690a57d5" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "language_model.model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "language_model.model.layers.27.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "language_model.model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "language_model.model.layers.28.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "ec0d3ef0f36a84826a70d8625e62cd36" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "556fffd5b8ebaf2cefba9fb6318cad88" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d9b62b7a12e7a4290f5bcb7ad986bd32" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "language_model.model.layers.29.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "39d4ef2d1ba1108372335d7314203093" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "17208fdf588fecd4eac8821fc82b2a4b" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "70c44b16933ef948451599a8b762f816" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "language_model.model.layers.28.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "language_model.model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "language_model.model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "language_model.model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "language_model.model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "language_model.model.layers.28.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "language_model.model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "language_model.model.layers.29.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "language_model.model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "language_model.model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "language_model.model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "416e1ecc355decdc1bff117e3ad11b6d" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "language_model.model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "language_model.model.layers.29.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "language_model.model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "language_model.model.layers.30.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "ced6da8198380e2f9cc2a8224009a408" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "daef2dfc0991f0ae599eacfdffc7f30e" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ae11aa6026cf219c965097c81bfd788d" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "language_model.model.layers.31.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "4379c3dfe91e0ed5a8556913f1907e6e" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "ae7e8efdd25e3d0a646ecdbcd723b00b" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a3a5af8e74355f653f20b8126fc55190" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "language_model.model.layers.30.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "language_model.model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "language_model.model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "language_model.model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "language_model.model.layers.30.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "language_model.model.layers.30.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "language_model.model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "language_model.model.layers.31.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "language_model.model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "language_model.model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "language_model.model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "a0fd5e5ca6459e9736a65bb6968bda8d" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 65667072, "records": [ { "name": "language_model.model.embed_tokens.q_weight", "shape": [ 32064, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65667072, "byteOffset": 0 } ], "md5sum": "666910a06d8587ae9706fb51fd5ff237" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "language_model.model.layers.0.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "b4b23c07f2ad0337862b3c40dad8af62" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "30e0d19ca14fbc9ac47985cfeed7c5b2" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3db98f07c3e244b05e3d126e7c5a43f9" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 29270016, "records": [ { "name": "language_model.model.layers.31.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "language_model.model.layers.31.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "language_model.model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "language_model.model.embed_tokens.q_scale", "shape": [ 32064, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8208384, "byteOffset": 9445376 }, { "name": "language_model.model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 17653760 }, { "name": "language_model.model.layers.0.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 17661952 }, { "name": "language_model.model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 20480000 }, { "name": "language_model.model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26116096 }, { "name": "language_model.model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 26124288 } ], "md5sum": "f9c8fd1f964d0625a838b7d28aeb7ca7" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "language_model.model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "language_model.model.layers.0.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "language_model.model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "language_model.model.layers.1.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "227b1c14f704f776ff703b28fc6a0250" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "f9bf685119fe427362280044bc626c37" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c7519dd2c7142fb5c741bb7918d111eb" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "language_model.model.layers.10.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "5e72e12fdbbfe961cf9f077d895bfdba" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "8a0e8d407f00267094acfb1a4e4a92de" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "137505fabf3800bf01fa0477da080c6e" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "language_model.model.layers.1.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "language_model.model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "language_model.model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "language_model.model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "language_model.model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "language_model.model.layers.1.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "language_model.model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "language_model.model.layers.10.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "language_model.model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "language_model.model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "language_model.model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "fa6805d2ea67749da67f08289d064da4" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f0f4b17271931e95e979ab8234c47978" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "language_model.model.layers.2.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "b07d0214d877d3aa3b79c882f8d3536e" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "e688e81a4c57ae2aa46439df41fb0ed9" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4573251b755740cb481512c87d4dca67" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "language_model.model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "language_model.model.layers.10.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "language_model.model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 9437184 }, { "name": "language_model.model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "language_model.model.layers.2.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "language_model.model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "language_model.model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "language_model.model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "language_model.model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "d2d6d01eb9f84a632c3e27c438ed1ed5" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "f9e160769324528655cbed9188360bfc" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "47555d51885a325ec519fea4e8116134" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "language_model.model.layers.2.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "language_model.model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "language_model.model.layers.3.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "language_model.model.layers.3.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "language_model.model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "language_model.model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "aef1f4d7ffd5911dbd70506d412878ff" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "language_model.model.layers.4.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "7b6baf8b3eaae04476c747766504bf2c" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "ef69b6439bb8be60391f7042e64ab7f3" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a3eb9a27d3675e6172b10e81f841fdb4" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "language_model.model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "language_model.model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "language_model.model.layers.3.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "language_model.model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "language_model.model.layers.4.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "language_model.model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "language_model.model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "language_model.model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "language_model.model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "f537ffd61fbe6212cb12e7793f62be07" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "57f1ef1afce722875438923b1fe74ba2" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "497dd552df591ec594c94dfa4ec7a2bc" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "language_model.model.layers.4.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "language_model.model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "language_model.model.layers.5.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "language_model.model.layers.5.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "language_model.model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "language_model.model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "2b55cf18c4ab5e49b19ca204599e8c95" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "language_model.model.layers.6.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "0bf6dfa62df70f6814464dabc3e02417" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "54ecddcc2c7ea7f6e5b0c7e94cd30189" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e99cd53d5b5d1320a7e780a8df0e75fb" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "language_model.model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "language_model.model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "language_model.model.layers.5.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "language_model.model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "language_model.model.layers.6.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "language_model.model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "language_model.model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "language_model.model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "language_model.model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "67f5eb7d4c97e2b8cb7fd7a7a0815f92" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "8c2fb639173363d5ad5f79a00c9107f9" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "41aa57ea111851494d30824c6d552ded" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "language_model.model.layers.6.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "language_model.model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "language_model.model.layers.7.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "language_model.model.layers.7.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "language_model.model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "language_model.model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "20f05c42b6c3a77b853cb71ee0361c4e" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "language_model.model.layers.8.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "fb4e59faa67da81fe7f0d27460afe955" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "8c661fb04f450f2fa2188a488257c0ba" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "5605fda25b65726d927aede27aab0836" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "language_model.model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "language_model.model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "language_model.model.layers.7.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "language_model.model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "language_model.model.layers.8.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "language_model.model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "language_model.model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "language_model.model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "language_model.model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "ea4fb51e6e31236d2f8ea43ef2594218" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "807bec18ea24cfa2a720d33ee3d246d1" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c09e400307a821e78280fe9b658990d1" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "language_model.model.layers.8.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "language_model.model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "language_model.model.layers.9.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "language_model.model.layers.9.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "language_model.model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "language_model.model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "584120be1b4997b1f24b8c987206e556" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 33087088, "records": [ { "name": "language_model.model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "language_model.model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "language_model.model.layers.9.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "multi_modal_projector.linear_1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "multi_modal_projector.linear_1.q_weight", "shape": [ 4096, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12591104 }, { "name": "multi_modal_projector.linear_1.q_scale", "shape": [ 4096, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 196608, "byteOffset": 14163968 }, { "name": "multi_modal_projector.linear_2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14360576 }, { "name": "multi_modal_projector.linear_2.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14368768 }, { "name": "multi_modal_projector.linear_2.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22757376 }, { "name": "vision_tower.vision_model.embeddings.class_embedding", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 23805952 }, { "name": "vision_tower.vision_model.embeddings.patch_embedding.weight", "shape": [ 768, 3, 16, 16 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 23807488 }, { "name": "vision_tower.vision_model.embeddings.position_embedding.q_weight", "shape": [ 197, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75648, "byteOffset": 24987136 }, { "name": "vision_tower.vision_model.embeddings.position_embedding.q_scale", "shape": [ 197, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9456, "byteOffset": 25062784 }, { "name": "vision_tower.vision_model.encoder.layers.0.layer_norm1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 25072240 }, { "name": "vision_tower.vision_model.encoder.layers.0.layer_norm1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 25073776 }, { "name": "vision_tower.vision_model.encoder.layers.0.layer_norm2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 25075312 }, { "name": "vision_tower.vision_model.encoder.layers.0.layer_norm2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 25076848 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25078384 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc1.q_weight", "shape": [ 3072, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 25084528 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc1.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26264176 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 26411632 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc2.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 26413168 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc2.q_scale", "shape": [ 768, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 27592816 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 27740272 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 27741808 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 28036720 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 28073584 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 28075120 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 28370032 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 28406896 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 28408432 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 28703344 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 28740208 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 28741744 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 29036656 }, { "name": "vision_tower.vision_model.encoder.layers.1.layer_norm1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 29073520 }, { "name": "vision_tower.vision_model.encoder.layers.1.layer_norm1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 29075056 }, { "name": "vision_tower.vision_model.encoder.layers.1.layer_norm2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 29076592 }, { "name": "vision_tower.vision_model.encoder.layers.1.layer_norm2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 29078128 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 29079664 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc1.q_weight", "shape": [ 3072, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 29085808 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc1.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 30265456 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 30412912 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc2.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 30414448 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc2.q_scale", "shape": [ 768, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 31594096 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 31741552 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 31743088 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 32038000 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 32074864 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 32076400 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 32371312 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 32408176 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 32409712 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 32704624 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 32741488 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 32743024 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 33037936 }, { "name": "vision_tower.vision_model.encoder.layers.10.layer_norm1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 33074800 }, { "name": "vision_tower.vision_model.encoder.layers.10.layer_norm1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 33076336 }, { "name": "vision_tower.vision_model.encoder.layers.10.layer_norm2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 33077872 }, { "name": "vision_tower.vision_model.encoder.layers.10.layer_norm2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 33079408 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33080944 } ], "md5sum": "ff682c0140b3aa47b8e617245657265f" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 33338880, "records": [ { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc1.q_weight", "shape": [ 3072, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc1.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 1179648 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 1327104 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc2.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 1328640 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc2.q_scale", "shape": [ 768, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 2508288 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 2655744 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 2657280 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 2952192 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 2989056 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 2990592 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 3285504 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 3322368 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 3323904 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 3618816 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 3655680 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 3657216 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 3952128 }, { "name": "vision_tower.vision_model.encoder.layers.11.layer_norm1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 3988992 }, { "name": "vision_tower.vision_model.encoder.layers.11.layer_norm1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 3990528 }, { "name": "vision_tower.vision_model.encoder.layers.11.layer_norm2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 3992064 }, { "name": "vision_tower.vision_model.encoder.layers.11.layer_norm2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 3993600 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 3995136 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc1.q_weight", "shape": [ 3072, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 4001280 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc1.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 5180928 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 5328384 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc2.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 5329920 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc2.q_scale", "shape": [ 768, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 6509568 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 6657024 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 6658560 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 6953472 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 6990336 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 6991872 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 7286784 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 7323648 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 7325184 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 7620096 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 7656960 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 7658496 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 7953408 }, { "name": "vision_tower.vision_model.encoder.layers.2.layer_norm1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 7990272 }, { "name": "vision_tower.vision_model.encoder.layers.2.layer_norm1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 7991808 }, { "name": "vision_tower.vision_model.encoder.layers.2.layer_norm2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 7993344 }, { "name": "vision_tower.vision_model.encoder.layers.2.layer_norm2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 7994880 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 7996416 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc1.q_weight", "shape": [ 3072, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 8002560 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc1.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 9182208 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 9329664 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc2.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9331200 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc2.q_scale", "shape": [ 768, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 10510848 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 10658304 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 10659840 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 10954752 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 10991616 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 10993152 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 11288064 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 11324928 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 11326464 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 11621376 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 11658240 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 11659776 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 11954688 }, { "name": "vision_tower.vision_model.encoder.layers.3.layer_norm1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 11991552 }, { "name": "vision_tower.vision_model.encoder.layers.3.layer_norm1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 11993088 }, { "name": "vision_tower.vision_model.encoder.layers.3.layer_norm2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 11994624 }, { "name": "vision_tower.vision_model.encoder.layers.3.layer_norm2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 11996160 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 11997696 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc1.q_weight", "shape": [ 3072, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 12003840 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc1.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 13183488 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 13330944 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc2.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 13332480 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc2.q_scale", "shape": [ 768, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 14512128 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 14659584 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 14661120 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 14956032 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 14992896 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 14994432 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 15289344 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 15326208 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 15327744 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 15622656 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 15659520 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 15661056 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 15955968 }, { "name": "vision_tower.vision_model.encoder.layers.4.layer_norm1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 15992832 }, { "name": "vision_tower.vision_model.encoder.layers.4.layer_norm1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 15994368 }, { "name": "vision_tower.vision_model.encoder.layers.4.layer_norm2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 15995904 }, { "name": "vision_tower.vision_model.encoder.layers.4.layer_norm2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 15997440 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15998976 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc1.q_weight", "shape": [ 3072, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 16005120 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc1.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 17184768 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 17332224 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc2.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 17333760 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc2.q_scale", "shape": [ 768, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 18513408 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 18660864 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 18662400 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 18957312 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 18994176 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 18995712 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 19290624 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 19327488 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 19329024 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 19623936 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 19660800 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 19662336 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 19957248 }, { "name": "vision_tower.vision_model.encoder.layers.5.layer_norm1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 19994112 }, { "name": "vision_tower.vision_model.encoder.layers.5.layer_norm1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 19995648 }, { "name": "vision_tower.vision_model.encoder.layers.5.layer_norm2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 19997184 }, { "name": "vision_tower.vision_model.encoder.layers.5.layer_norm2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 19998720 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 20000256 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc1.q_weight", "shape": [ 3072, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 20006400 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc1.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 21186048 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 21333504 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc2.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 21335040 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc2.q_scale", "shape": [ 768, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 22514688 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 22662144 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 22663680 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 22958592 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 22995456 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 22996992 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 23291904 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 23328768 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 23330304 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 23625216 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 23662080 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 23663616 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 23958528 }, { "name": "vision_tower.vision_model.encoder.layers.6.layer_norm1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 23995392 }, { "name": "vision_tower.vision_model.encoder.layers.6.layer_norm1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 23996928 }, { "name": "vision_tower.vision_model.encoder.layers.6.layer_norm2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 23998464 }, { "name": "vision_tower.vision_model.encoder.layers.6.layer_norm2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 24000000 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 24001536 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc1.q_weight", "shape": [ 3072, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 24007680 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc1.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 25187328 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 25334784 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc2.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 25336320 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc2.q_scale", "shape": [ 768, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26515968 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 26663424 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 26664960 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 26959872 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 26996736 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 26998272 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 27293184 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 27330048 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 27331584 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 27626496 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 27663360 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 27664896 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 27959808 }, { "name": "vision_tower.vision_model.encoder.layers.7.layer_norm1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 27996672 }, { "name": "vision_tower.vision_model.encoder.layers.7.layer_norm1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 27998208 }, { "name": "vision_tower.vision_model.encoder.layers.7.layer_norm2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 27999744 }, { "name": "vision_tower.vision_model.encoder.layers.7.layer_norm2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 28001280 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 28002816 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc1.q_weight", "shape": [ 3072, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 28008960 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc1.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 29188608 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 29336064 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc2.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 29337600 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc2.q_scale", "shape": [ 768, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 30517248 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 30664704 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 30666240 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 30961152 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 30998016 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 30999552 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 31294464 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 31331328 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 31332864 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 31627776 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 31664640 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 31666176 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 31961088 }, { "name": "vision_tower.vision_model.encoder.layers.8.layer_norm1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 31997952 }, { "name": "vision_tower.vision_model.encoder.layers.8.layer_norm1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 31999488 }, { "name": "vision_tower.vision_model.encoder.layers.8.layer_norm2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 32001024 }, { "name": "vision_tower.vision_model.encoder.layers.8.layer_norm2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 32002560 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32004096 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc1.q_weight", "shape": [ 3072, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 32010240 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc1.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 33189888 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 33337344 } ], "md5sum": "d58d33362d057cac446685e88873fda9" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "e51e90b2bfe48703986e96f03d56b725" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 32038400, "records": [ { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc2.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc2.q_scale", "shape": [ 768, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 1179648 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 1327104 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 1328640 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 1623552 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 1660416 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 1661952 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 1956864 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 1993728 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 1995264 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 2290176 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 2327040 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 2328576 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 2623488 }, { "name": "vision_tower.vision_model.encoder.layers.9.layer_norm1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 2660352 }, { "name": "vision_tower.vision_model.encoder.layers.9.layer_norm1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 2661888 }, { "name": "vision_tower.vision_model.encoder.layers.9.layer_norm2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 2663424 }, { "name": "vision_tower.vision_model.encoder.layers.9.layer_norm2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 2664960 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 2666496 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc1.q_weight", "shape": [ 3072, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 2672640 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc1.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 3852288 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 3999744 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc2.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 4001280 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc2.q_scale", "shape": [ 768, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 5180928 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 5328384 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 5329920 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 5624832 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 5661696 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 5663232 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 5958144 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 5995008 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 5996544 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 6291456 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 6328320 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 6329856 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 6624768 }, { "name": "vision_tower.vision_model.post_layernorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 6661632 }, { "name": "vision_tower.vision_model.post_layernorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 6663168 }, { "name": "vision_tower.vision_model.pre_layrnorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 6664704 }, { "name": "vision_tower.vision_model.pre_layrnorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 6666240 }, { "name": "language_model.model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 6667776 }, { "name": "language_model.model.layers.11.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 6675968 }, { "name": "language_model.model.layers.11.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 29220352 } ], "md5sum": "590b107937876e6183ef75e6b2a783d6" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "language_model.model.layers.12.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "e47f5787b847a4ebd1e0d4bdb8563127" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "c536af2e073f757a793c8294d8518f4f" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0e53f470c5f8fafee2dacec069448d0e" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 26697728, "records": [ { "name": "language_model.model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 0 }, { "name": "language_model.model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5636096 }, { "name": "language_model.model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 5644288 }, { "name": "language_model.model.layers.11.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 14032896 }, { "name": "language_model.model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 15081472 }, { "name": "language_model.model.layers.12.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 15089664 }, { "name": "language_model.model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 17907712 }, { "name": "language_model.model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23543808 }, { "name": "language_model.model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 23552000 } ], "md5sum": "b51d1d833edba5e863ace0b23c81c8c8" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "language_model.model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "language_model.model.layers.12.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "language_model.model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "language_model.model.layers.13.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "a4e29e3938a9f0c5ee28d82f30488591" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "52c034d85eb2c52a039d35e9cabeeeb1" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6d7254e78588681db21a169dd3545627" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "language_model.model.layers.14.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "2cb15126050dafe8fa3cbf43e64b476e" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "04e3708b03264d641d733a25829ab802" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a8efcce4906364b0b495415df6fbf537" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "language_model.model.layers.13.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "language_model.model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "language_model.model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "language_model.model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "language_model.model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "language_model.model.layers.13.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "language_model.model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "language_model.model.layers.14.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "language_model.model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "language_model.model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "language_model.model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "9e685829e4184594c9977c3aa6e24e3d" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "language_model.model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "language_model.model.layers.14.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "language_model.model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "language_model.model.layers.15.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "e1f060a505e3dcec6484516344fce5c6" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "5ba4d830382cbbab7efb57b6c1020eb9" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b3904eb27c3bf09d14541fda971b30dd" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "language_model.model.layers.16.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "6f090d02ed7d9eac9c2642d2a81aa332" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "520cc6148e8441bf245ca470de67df40" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8a57b5f22ccaa59dfa1ebbc07cceb4ca" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "language_model.model.layers.15.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "language_model.model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "language_model.model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "language_model.model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "language_model.model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "language_model.model.layers.15.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "language_model.model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "language_model.model.layers.16.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "language_model.model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "language_model.model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "language_model.model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "f30c6e92967bb0b3ae0d2f39502ed73b" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "language_model.model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "language_model.model.layers.16.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "language_model.model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "language_model.model.layers.17.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "836f8fc10897fee0c61666c752343a25" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "b08e781c1946ec0e4dd7d7e9f9c9873d" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e3ae25ba293d6eb223215a88c21c9204" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "language_model.model.layers.18.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "164a5b5d7b110cd22ba196238b4bec38" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "4ba513b42aeaedf821d01d8ec7ee3f1a" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "fdf8c7f5bffd67f2fe67e0dbef9d8b0f" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "language_model.model.layers.17.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "language_model.model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "language_model.model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "language_model.model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "language_model.model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "language_model.model.layers.17.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "language_model.model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "language_model.model.layers.18.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "language_model.model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "language_model.model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "language_model.model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "3dd400f0d8a454f4e9a470fb89b373be" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "language_model.model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "language_model.model.layers.18.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "language_model.model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "language_model.model.layers.19.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "09e77e427a7889cf0b3876468960a52d" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "ed200d448369a3a462b6cb61767f8fc5" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "72a4fcce4704e177c1d27f51207f665b" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "language_model.model.layers.20.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "ed08e858a8a20f6bac724f422ee22b6e" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "0bcbd55f1d8c19b29bf3d27a8ae563e3" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8c2b032a7ed39c283e1410d69a2024ee" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "language_model.model.layers.19.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "language_model.model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "language_model.model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "language_model.model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "language_model.model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "language_model.model.layers.19.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "language_model.model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "language_model.model.layers.20.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "language_model.model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "language_model.model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "language_model.model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "06e3bcc372ecea0fbd6e5b960a74dac6" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "language_model.model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "language_model.model.layers.20.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "language_model.model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "language_model.model.layers.21.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "c9c0bb38e5506bcb61716cee89cea6a8" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "88a041da2cc4e41d52219c21282a44ca" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6f072bdf3e27411fc0767cd32925927f" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "language_model.model.layers.22.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "5a06b00f4d609812816159abc4dfbabe" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "language_model.model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "391f0250011b71e8515e9b1b05a5d4f8" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "129cc0abd3ada60a52542b1e222218c5" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "language_model.model.layers.21.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "language_model.model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "language_model.model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "language_model.model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "language_model.model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "language_model.model.layers.21.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "language_model.model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "language_model.model.layers.22.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "language_model.model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "language_model.model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "language_model.model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "dda141f84b3a8df28dfd9685c1cd51fe" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "9b20217a220297543ca6b0ada817c4a5" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "language_model.model.layers.22.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "language_model.model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 9437184 }, { "name": "language_model.model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "language_model.model.layers.23.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 20971520 } ], "md5sum": "ff2f0f73fc6d7296d4d039bb635b0037" } ] }