{ "metadata": { "ParamSize": 325, "ParamBytes": 3790741504.0, "BitsPerParam": 4.500454373872803 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 65536000, "records": [ { "name": "lm_head.q_weight", "shape": [ 32000, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 65536000, "byteOffset": 0 } ], "md5sum": "8a73677d56fcbae5b449c5d44d65055b" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 30744576, "records": [ { "name": "lm_head.q_scale", "shape": [ 32000, 128 ], "dtype": "float16", "format": "raw", "nbytes": 8192000, "byteOffset": 0 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 8192000 }, { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 8200192 } ], "md5sum": "f14f8fbc5e0e520af31c87ed16847b14" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "e114a5ec13e265f9246d24ff010793e8" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ab9e6cf8ded7b01af63496454cbf3797" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "7fc5aa64f9937917d21981c0656d95a9" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "1f7f354b69f83d867117f93c75ca9264" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0497f3e3e3c8f54e263c9c9d74deacbb" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "ad38ecf6470b14c3eddfcf6b8026fa9a" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "4cce513f54c40abf676c5ba80dfe35bf" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "74e0c73abca300f2d87b7db28e1fc4c9" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0daa4809ad558590426527187ca502c3" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "3b9dc0141455ca94860210a5c3c765c3" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "c544bace4d057a8da2df6ef9de5d9732" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b0a020f252abdd640710a487bd604606" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "2fba4cb3aea173202992090c78b77da5" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "1c4e12c3032fad4c5b81aa66671ae701" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "b7201a4a94c3f0590329cab49e3978fe" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6438df8045d49f0bf9b777bd4be6f1a1" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "4f9efc48974ff783548c6aa23463ba31" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "3c919a553f9d2e88dcd15e5a3fcf9b27" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7f425b60efd15a21f63bf87fab3452e7" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "e0f11a5e4fe259450ac1c9f3cc845d6c" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "61039348025279590bd8bdc3ea4bbca3" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "853653d56149a68bdd61a7c1b2ed120a" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "97a08ff62e8117cb831615b0f2c99334" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "fcbc3c435afc4c56a34a63797c62d081" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "b8fa859a2a93a38e14a763465c0245c0" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8bc24c5908fdefbc64fbca0cfaeddc51" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "c8874194773359ab2c73d6b8ae27d7d9" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 65536000, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32000, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 65536000, "byteOffset": 0 } ], "md5sum": "66ad602d220b664a12421ccdb3ccb5d6" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "db570748f3458eff8205503782b2b1ca" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "5b23a18e2f1ce4e8e4783dfe4e056615" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d2cd8ae66289879284ebf3335f5bb3c2" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 29253632, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.embed_tokens.q_scale", "shape": [ 32000, 128 ], "dtype": "float16", "format": "raw", "nbytes": 8192000, "byteOffset": 9445376 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 17637376 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 17645568 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 20463616 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 26099712 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 26107904 } ], "md5sum": "235da66a83808a169e6dd0389ea628e0" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "387f5a48e8833545a9e6625174177147" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "6b9c9c046c5def1a3554bf23fd449715" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b0fbfdc8b07b911c27f1c77ac9f6b0c7" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "fe743495899c53ec9d39d7bdde54c9b8" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "1f40c42e671b86c077853c473315a1a7" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "dba942217d515737d3660f4bb42aa52e" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "96c2d2d457e245086fa9eef9eadaa08c" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "c74842e4663434d99c262c930e59fae7" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "ec15fbc4ce31df5effad4b048749e190" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f150ccdc57f9e011c1f43a5811166279" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "178f72f719c24f633d506d62966df563" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "4548c2c7a33661a30d88cb3c71f08ca9" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f6202141e5fef174d48c21950b49607b" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "f7a658290f79a3f4c444ef0f7731ad60" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "ce60a1da15fa399406ce71250609be42" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "a22608ec842dedfcdd1ce9ec980229b3" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "97aeea95f7e75a2dc63af7a4fbfaa038" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "c5c905105dcf35af1035ffcbc21cd02b" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "170ecd72ddce28ccbc02cb87da9742c0" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1be45451863d797bbc5c026fecd9089e" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "b2613b0b9c3cfc429c06185fd8a80bcb" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "e431f95e1faf8969acd6eb48165c0e8f" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "e6431be7c454f2d153235f46042ed3cb" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "018d5b6a87c6753407119a41441fb7db" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "4b4a23a01bdea42dd56e2928a8649fa3" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "5cef3b5abb12c2c0033ba506083b5cb0" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e918bc270a74419195f80ef44cf538b9" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "646b8fc1e9c76c3421bb9ff983381887" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "28272a97757c2f6ce33c67f671792dd7" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "0374db0f9d986c3865f0e1127bd7c4e7" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b9e5895348f211118ec2aef0f5480ac4" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "5fbc44603a2962b74c1fd9aa7a6483bd" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "6758eaa67d50d176ccc7fd6e04066dca" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "dc41a539f6006ee401283004ecf2a757" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "de21b6b74453f601369d39eab11a83f6" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "402727f31bed4f7aa0fe12bc1e06cebe" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "3b8844826937bc6b8b4d605f24803fc1" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b75c7a160d72673da5a907ff2420a804" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "0f20b2d3a5aaf6289d520ce16bf9e429" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "84ba93be30d0b3da934777fa0a04563e" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d2b856be8da472ea80accc7b80dccabe" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "112169ed477082383321512048e655c4" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "3947df68c3dc37b5591fa56f3480b4f2" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "9d14761cebfc8b928830327a1a8d641b" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "047781efb723631cea11872f8b0ac5b2" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "70cc93ad771010ba4db590fa2d316623" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "8a0d2e3aeaccabf4e369cae36f9de1d0" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7ecc5b846c796bd8a9df5166e48da9b3" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "88bb1353b8146da768d06f353f7ddc22" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "cdf20e93f4ed2607b08aa7f10ce37db9" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "a5d40ab85a9286393c1dd0256e97fcff" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7563103ed726dbe49aac60e7e7adf378" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "269d28a86479a0ddf7b11e333f98da8e" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "7136bc14c72dab36849ba7a77ca6bd5f" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "888c0fe959f4008e9776b2e4f322c102" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "616984218cfd4187b02de2a35dc1ea9d" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "798edf7cfd53f938c607d8a0fee4dcdd" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "46023b82d0acf71b699af98d634f186a" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "83b0fcdcf18901aaa79b2f757b7a9b65" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "52f62b26b649e98d13d39430e4c22c08" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "ffef0af806eb5ad195dff005e4c7a0db" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ec602aa527e74344a25da011b0524c77" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "cbb74df2ec60f4455861bfef2a6bd2c1" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "8da13b26890594049e53e891964ebaf6" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "30ea4f0ec0c48665c28916918848d70b" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "fa3792e8b94c87b01c76302aed717580" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "43909cfbdbf6b8e73345fe75cb27a6f3" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "c9c041914bc226b3d3413cc55c1ea515" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "66c4f85f749681350067688e08025cac" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "016d2b77848a71aa44b5d8c60b56bc24" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "741e888ce1bc10cf4466fcba8b154c0e" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "80e9b846b3cbde0a81d98c36a42e5893" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c0ecc4a20aea308f088fc5013e77551a" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "0415b228d5aaf08a49fe5f5e94df1be9" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "8db1080b2975bda71434a1f350b20cec" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e8f45b4fc47b73331e4aeaa1f7312e26" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "40cb85109b1aca9ccec8f728f528b163" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "raw", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "effd6e644e946a2b06324cded63cf5ac" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "3d189c21a66d98cfc21202d349a785dd" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b1148de679b33c93a3da0f54b847b6b9" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 21045248, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "float16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "float16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "float16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 } ], "md5sum": "e8a9b0e9cd8ff6c60f282547242cb49e" } ] }