{ "metadata": { "ParamSize": 269, "ParamBytes": 283132928.0, "BitsPerParam": 3.655862583030465 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 63205376, "records": [ { "name": "lm_head.q_weight", "shape": [ 151936, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 63205376, "byteOffset": 0 } ], "md5sum": "e7b49b4c6ba0344356fbdcdec46233be" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 63205376, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 151936, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 63205376, "byteOffset": 0 } ], "md5sum": "e7b49b4c6ba0344356fbdcdec46233be" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 33418240, "records": [ { "name": "lm_head.q_scale", "shape": [ 151936, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7900672, "byteOffset": 0 }, { "name": "model.embed_tokens.q_scale", "shape": [ 151936, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7900672, "byteOffset": 7900672 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 15801344 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 1024, 284 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1163264, "byteOffset": 15803392 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 1024, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 145408, "byteOffset": 16966656 }, { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 5632, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2342912, "byteOffset": 17112064 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 5632, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 292864, "byteOffset": 19454976 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 19747840 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19749888 }, { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 3072, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1277952, "byteOffset": 19756032 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 3072, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 159744, "byteOffset": 21033984 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 1024, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 21193728 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 1024, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 21619712 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 21672960 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 1024, 284 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1163264, "byteOffset": 21675008 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 1024, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 145408, "byteOffset": 22838272 }, { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 5632, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2342912, "byteOffset": 22983680 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 5632, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 292864, "byteOffset": 25326592 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25619456 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25621504 }, { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 3072, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1277952, "byteOffset": 25627648 }, { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 3072, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 159744, "byteOffset": 26905600 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 1024, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 27065344 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 1024, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 27491328 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 27544576 }, { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 1024, 284 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1163264, "byteOffset": 27546624 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 1024, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 145408, "byteOffset": 28709888 }, { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 5632, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2342912, "byteOffset": 28855296 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 5632, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 292864, "byteOffset": 31198208 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 31491072 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31493120 }, { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 3072, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1277952, "byteOffset": 31499264 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 3072, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 159744, "byteOffset": 32777216 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 1024, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 32936960 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 1024, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 33362944 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 33416192 } ], "md5sum": "e4b6c5f9d9a8eb8813e4d4f88846ba33" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 33310720, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 1024, 284 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1163264, "byteOffset": 0 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 1024, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 145408, "byteOffset": 1163264 }, { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 5632, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2342912, "byteOffset": 1308672 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 5632, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 292864, "byteOffset": 3651584 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 3944448 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 3946496 }, { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 3072, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1277952, "byteOffset": 3952640 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 3072, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 159744, "byteOffset": 5230592 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 1024, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 5390336 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 1024, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 5816320 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 5869568 }, { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 1024, 284 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1163264, "byteOffset": 5871616 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 1024, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 145408, "byteOffset": 7034880 }, { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 5632, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2342912, "byteOffset": 7180288 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 5632, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 292864, "byteOffset": 9523200 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 9816064 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9818112 }, { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 3072, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1277952, "byteOffset": 9824256 }, { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 3072, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 159744, "byteOffset": 11102208 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 1024, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11261952 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 1024, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 11687936 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 11741184 }, { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 1024, 284 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1163264, "byteOffset": 11743232 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 1024, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 145408, "byteOffset": 12906496 }, { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 5632, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2342912, "byteOffset": 13051904 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 5632, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 292864, "byteOffset": 15394816 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 15687680 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15689728 }, { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 3072, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1277952, "byteOffset": 15695872 }, { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 3072, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 159744, "byteOffset": 16973824 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 1024, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 17133568 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 1024, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 17559552 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 17612800 }, { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 1024, 284 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1163264, "byteOffset": 17614848 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 1024, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 145408, "byteOffset": 18778112 }, { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 5632, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2342912, "byteOffset": 18923520 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 5632, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 292864, "byteOffset": 21266432 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 21559296 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21561344 }, { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 3072, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1277952, "byteOffset": 21567488 }, { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 3072, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 159744, "byteOffset": 22845440 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 1024, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 23005184 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 1024, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 23431168 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23484416 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 1024, 284 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1163264, "byteOffset": 23486464 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 1024, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 145408, "byteOffset": 24649728 }, { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 5632, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2342912, "byteOffset": 24795136 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 5632, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 292864, "byteOffset": 27138048 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 27430912 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 27432960 }, { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 3072, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1277952, "byteOffset": 27439104 }, { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 3072, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 159744, "byteOffset": 28717056 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 1024, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 28876800 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 1024, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 29302784 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 29356032 }, { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 1024, 284 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1163264, "byteOffset": 29358080 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 1024, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 145408, "byteOffset": 30521344 }, { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 5632, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2342912, "byteOffset": 30666752 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 5632, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 292864, "byteOffset": 33009664 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 33302528 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33304576 } ], "md5sum": "4fb729f6a7bc624aa0f7422ac38cfcef" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 32585728, "records": [ { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 3072, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1277952, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 3072, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 159744, "byteOffset": 1277952 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 1024, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 1437696 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 1024, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 1863680 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 1916928 }, { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 1024, 284 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1163264, "byteOffset": 1918976 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 1024, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 145408, "byteOffset": 3082240 }, { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 5632, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2342912, "byteOffset": 3227648 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 5632, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 292864, "byteOffset": 5570560 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 5863424 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 5865472 }, { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 3072, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1277952, "byteOffset": 5871616 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 3072, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 159744, "byteOffset": 7149568 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 1024, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 7309312 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 1024, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 7735296 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 7788544 }, { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 1024, 284 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1163264, "byteOffset": 7790592 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 1024, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 145408, "byteOffset": 8953856 }, { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 5632, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2342912, "byteOffset": 9099264 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 5632, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 292864, "byteOffset": 11442176 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 11735040 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 11737088 }, { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 3072, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1277952, "byteOffset": 11743232 }, { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 3072, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 159744, "byteOffset": 13021184 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 1024, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 13180928 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 1024, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 13606912 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 13660160 }, { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 1024, 284 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1163264, "byteOffset": 13662208 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 1024, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 145408, "byteOffset": 14825472 }, { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 5632, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2342912, "byteOffset": 14970880 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 5632, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 292864, "byteOffset": 17313792 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 17606656 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17608704 }, { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 3072, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1277952, "byteOffset": 17614848 }, { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 3072, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 159744, "byteOffset": 18892800 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 1024, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 19052544 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 1024, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 19478528 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 19531776 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 1024, 284 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1163264, "byteOffset": 19533824 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 1024, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 145408, "byteOffset": 20697088 }, { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 5632, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2342912, "byteOffset": 20842496 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 5632, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 292864, "byteOffset": 23185408 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23478272 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23480320 }, { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 3072, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1277952, "byteOffset": 23486464 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 3072, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 159744, "byteOffset": 24764416 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 1024, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 24924160 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 1024, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 25350144 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25403392 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 1024, 284 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1163264, "byteOffset": 25405440 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 1024, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 145408, "byteOffset": 26568704 }, { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 5632, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2342912, "byteOffset": 26714112 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 5632, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 292864, "byteOffset": 29057024 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 29349888 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 29351936 }, { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 3072, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1277952, "byteOffset": 29358080 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 3072, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 159744, "byteOffset": 30636032 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 1024, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 30795776 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 1024, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 31221760 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 31275008 }, { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 1024, 284 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1163264, "byteOffset": 31277056 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 1024, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 145408, "byteOffset": 32440320 } ], "md5sum": "8572c24763eba76e31a9e9087aedc3bd" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 33439744, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 5632, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2342912, "byteOffset": 0 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 5632, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 292864, "byteOffset": 2342912 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 2635776 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 2637824 }, { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 3072, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1277952, "byteOffset": 2643968 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 3072, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 159744, "byteOffset": 3921920 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 1024, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 4081664 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 1024, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 4507648 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 4560896 }, { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 1024, 284 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1163264, "byteOffset": 4562944 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 1024, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 145408, "byteOffset": 5726208 }, { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 5632, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2342912, "byteOffset": 5871616 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 5632, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 292864, "byteOffset": 8214528 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8507392 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 8509440 }, { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 3072, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1277952, "byteOffset": 8515584 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 3072, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 159744, "byteOffset": 9793536 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 1024, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 9953280 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 1024, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 10379264 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10432512 }, { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 1024, 284 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1163264, "byteOffset": 10434560 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 1024, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 145408, "byteOffset": 11597824 }, { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 5632, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2342912, "byteOffset": 11743232 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 5632, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 292864, "byteOffset": 14086144 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 14379008 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14381056 }, { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 3072, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1277952, "byteOffset": 14387200 }, { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 3072, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 159744, "byteOffset": 15665152 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 1024, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 15824896 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 1024, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 16250880 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16304128 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 1024, 284 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1163264, "byteOffset": 16306176 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 1024, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 145408, "byteOffset": 17469440 }, { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 5632, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2342912, "byteOffset": 17614848 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 5632, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 292864, "byteOffset": 19957760 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 20250624 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 20252672 }, { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 3072, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1277952, "byteOffset": 20258816 }, { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 3072, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 159744, "byteOffset": 21536768 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 1024, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 21696512 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 1024, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 22122496 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 22175744 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 1024, 284 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1163264, "byteOffset": 22177792 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 1024, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 145408, "byteOffset": 23341056 }, { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 5632, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2342912, "byteOffset": 23486464 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 5632, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 292864, "byteOffset": 25829376 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 26122240 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26124288 }, { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 3072, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1277952, "byteOffset": 26130432 }, { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 3072, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 159744, "byteOffset": 27408384 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 1024, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 27568128 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 1024, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 27994112 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 28047360 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 1024, 284 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1163264, "byteOffset": 28049408 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 1024, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 145408, "byteOffset": 29212672 }, { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 5632, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2342912, "byteOffset": 29358080 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 5632, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 292864, "byteOffset": 31700992 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 31993856 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31995904 }, { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 3072, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1277952, "byteOffset": 32002048 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 3072, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 159744, "byteOffset": 33280000 } ], "md5sum": "c3e18cf55248dd1d744e5c930cfd7468" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 23967744, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 1024, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 1024, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 425984 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 479232 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 1024, 284 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1163264, "byteOffset": 481280 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 1024, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 145408, "byteOffset": 1644544 }, { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 5632, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2342912, "byteOffset": 1789952 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 5632, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 292864, "byteOffset": 4132864 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 4425728 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 4427776 }, { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 3072, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1277952, "byteOffset": 4433920 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 3072, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 159744, "byteOffset": 5711872 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 1024, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 5871616 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 1024, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 6297600 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 6350848 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 1024, 284 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1163264, "byteOffset": 6352896 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 1024, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 145408, "byteOffset": 7516160 }, { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 5632, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2342912, "byteOffset": 7661568 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 5632, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 292864, "byteOffset": 10004480 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10297344 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 10299392 }, { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 3072, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1277952, "byteOffset": 10305536 }, { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 3072, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 159744, "byteOffset": 11583488 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 1024, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11743232 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 1024, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 12169216 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12222464 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 1024, 284 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1163264, "byteOffset": 12224512 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 1024, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 145408, "byteOffset": 13387776 }, { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 5632, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2342912, "byteOffset": 13533184 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 5632, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 292864, "byteOffset": 15876096 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16168960 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 16171008 }, { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 3072, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1277952, "byteOffset": 16177152 }, { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 3072, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 159744, "byteOffset": 17455104 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 1024, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 17614848 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 1024, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 18040832 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 18094080 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 1024, 284 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1163264, "byteOffset": 18096128 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 1024, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 145408, "byteOffset": 19259392 }, { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 5632, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2342912, "byteOffset": 19404800 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 5632, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 292864, "byteOffset": 21747712 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 22040576 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 22042624 }, { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 3072, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1277952, "byteOffset": 22048768 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 3072, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 159744, "byteOffset": 23326720 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 1024, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 23486464 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 1024, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 23912448 }, { "name": "model.norm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23965696 } ], "md5sum": "21d7d4b461aa432cc38af98cc018f736" } ] }