{ "metadata": { "ParamSize": 555, "ParamBytes": 17019832320.0, "BitsPerParam": 4.59034047349219 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 589824000, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 256000, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 589824000, "byteOffset": 0 } ], "md5sum": "33c0315e1fec4efbe2820fd1652b3907" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 73728000, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 256000, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 73728000, "byteOffset": 0 } ], "md5sum": "63ec10e87949fa56acd5798401501776" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "0c1cd6bfe199f79866d2d724743715b2" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "dbc571257f7b5d76c501bbb64e80e44c" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "a9244717eaecf8ca6572d9f064c14f30" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 31887360, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 0 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 9216 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 10626048 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31859712 }, { "name": "model.layers.0.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31868928 }, { "name": "model.layers.0.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31878144 } ], "md5sum": "b3620e1fe86056b27723263a9e1f1f4b" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "83a7a0e3cfc5882557c90a7fa75fde19" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "a1e94ebd955ef9311b1e55cb34ac7f8f" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "d28bb21824551bc8ce83b15deeea610f" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "fc1e10138f503e77e5994049a618a987" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 25989120, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2359296 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 11796480 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 12976128 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 12985344 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23602176 }, { "name": "model.layers.1.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23611392 }, { "name": "model.layers.1.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23620608 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23629824 } ], "md5sum": "125d51bd2bea98fcba3662f08f2f27cf" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 31850496, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9437184 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 10616832 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 29491200 } ], "md5sum": "9e0cd8818ed2d1f9b234893e0dd4903a" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "be551132e6ce6659c7c4651e5a32456e" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "c752a99e269bdeafae717c84ac2916df" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "ea4cce00773b56c04d9d6547c4dfe64b" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "e66624ba48784d866d64c02b62ef1f8c" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "05ea1d140d8823a2cd282e166214b407" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 31924224, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9437184 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 10616832 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10626048 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21242880 }, { "name": "model.layers.10.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21252096 }, { "name": "model.layers.10.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21261312 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21270528 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 21279744 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31896576 }, { "name": "model.layers.11.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31905792 }, { "name": "model.layers.11.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31915008 } ], "md5sum": "5b33441d7161a72475c6b86cb263a225" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "5af4aeb9c5c82fc9db406ca38737e4d0" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "f4bebd395193b56ae8113c0c8efcde4c" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "c5da438a9eb7ed33b6859e4a32121f34" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e440391d9c597de6ee9232d21faed545" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 25989120, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2359296 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 11796480 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 12976128 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 12985344 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23602176 }, { "name": "model.layers.12.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23611392 }, { "name": "model.layers.12.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23620608 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23629824 } ], "md5sum": "308371e04ab12170dcaa59e97661ff76" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "770fbe7c4cc436fcac1d735099c8e394" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "974695a1d4ea81d548ac7547ea344f52" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "3dcf6811bbc321b97165a29fbd981981" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "f0f49d462679eb0c55fc85dab9a6680c" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 33067008, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9437184 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 10616832 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10626048 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21242880 }, { "name": "model.layers.13.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21252096 }, { "name": "model.layers.13.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21261312 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21270528 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23629824 } ], "md5sum": "3635e35c84bfa6b5f0c665546a9a76c4" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "1fede7938558fd9e2d5d063928f11509" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "b3215564782d6a49a6f8c0922e47df0a" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e9a0d20a0e90b5cccc20125107bc2579" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 33067008, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 0 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1179648 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 1188864 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 11805696 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33039360 }, { "name": "model.layers.14.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33048576 }, { "name": "model.layers.14.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33057792 } ], "md5sum": "c895528a5e7a44dc89901071be2a5353" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 31850496, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2359296 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 11796480 }, { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 12976128 } ], "md5sum": "046ea54972b6ef10814ea6a2bf51f773" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "282f916d4271bc4849cf84c64124706e" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "17b7287870ed22be2de780739d055bed" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 31850496, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2359296 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 11796480 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 12976128 } ], "md5sum": "f88d1a5192037463d5aafdd161339cfb" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "4868c9f373189931a3df71588d2873c5" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "9c882e86fa67ced8c515fa4d3df4a74c" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "aa6e2d9bc22a4ddd1cb41ed87a7ebc6f" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "dbb8db49f555a076087a294a5a56424a" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 23639040, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2359296 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 11796480 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 12976128 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 12985344 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23602176 }, { "name": "model.layers.6.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23611392 }, { "name": "model.layers.6.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23620608 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23629824 } ], "md5sum": "a64ca3df72d305d246accbe8a50fd2c2" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "2672900bd06207d568034d3774ea270e" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "37501ff6c91f2d3297f605795fc552e8" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 31878144, "records": [ { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 0 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 10616832 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31850496 }, { "name": "model.layers.7.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31859712 }, { "name": "model.layers.7.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31868928 } ], "md5sum": "34370fe6b9fa1d6a0bccd8a2350f6e13" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "cca45de3c8f5bc6084edbb3b2745c76f" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "240f55506b49f8f9225bc95c72d2f4e3" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "479861316a1012b680a3ce2ecaf39841" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "303bbea98b39981cb1b569f4a7bdbd22" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 25989120, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2359296 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 11796480 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 12976128 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 12985344 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23602176 }, { "name": "model.layers.8.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23611392 }, { "name": "model.layers.8.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23620608 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23629824 } ], "md5sum": "055b841ef40ac2beaf03442d17f32ae9" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "259073fc5b03146a23a5645f3bf9b942" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "6b20344982979cc956e6e7e7d5625698" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "5ab2684bbd750443acddcb014018881b" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "ef7fd69a8028e5100af953d630b2b24c" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 33067008, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9437184 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 10616832 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10626048 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21242880 }, { "name": "model.layers.9.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21252096 }, { "name": "model.layers.9.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21261312 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21270528 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23629824 } ], "md5sum": "66758291c25300504c1464788389343c" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "5fd2531dccf85e7b8a195bee66b6c485" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "77303c8c73c293c2ddcf7e13d684b971" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "b6315c2670f65a7058fa5d90ee389ace" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 33076224, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 0 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1179648 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 1188864 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 11805696 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33039360 }, { "name": "model.layers.15.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33048576 }, { "name": "model.layers.15.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33057792 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33067008 } ], "md5sum": "f3aa08e0c1f1a912e80250fd97a931b0" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "f876c3da70d3298d36478575d1e2dec6" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "9ac71de09dac7be0a550577834e54979" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 31878144, "records": [ { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 0 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 10616832 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31850496 }, { "name": "model.layers.16.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31859712 }, { "name": "model.layers.16.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31868928 } ], "md5sum": "74bbba64eea2d25225a8e141b6dc8605" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "4fcb68f0197865e0da3bd5a5054677e2" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "913523891a68e0822e4c520f3126f08c" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "406663acd3b5cc9f8bda8f05b16dcb16" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "177f5142c9fc77b4524afffb8a770b86" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 25989120, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2359296 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 11796480 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 12976128 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 12985344 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23602176 }, { "name": "model.layers.17.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23611392 }, { "name": "model.layers.17.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23620608 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23629824 } ], "md5sum": "f7b5ace8da156e3d4c79c29f9ab479f4" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "d9d669d64e38939b1226ab425eab2715" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "249aaf1ba823c93629adc65b5195eb4c" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "fb42cdf53d472fa6870768a90b2e3347" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "1b110b2736a0a13abe59f9be49b9c36e" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 33067008, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9437184 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 10616832 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10626048 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21242880 }, { "name": "model.layers.18.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21252096 }, { "name": "model.layers.18.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21261312 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21270528 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23629824 } ], "md5sum": "64fc3d5498dd0585e224e3f787362e71" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "e97fe2f975256473e06716abac854a8a" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "583b058fa6ea63e17d5fedd262a20b53" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 24772608, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 0 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 1179648 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22413312 } ], "md5sum": "3e7fdcf31880becb794d83c5e45d0a9f" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "a9d432229c9f6fb00cb3b6e4825624fa" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "3ee71345500db399d249307246a0fa24" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "75e4bf1e0c6dd66625fea1fb8c953242" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "c951f58a71d96e62621b5c7ee515f5b8" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "fe96b2e16b5fc06d155fdf28d72654d8" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 31924224, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9437184 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 10616832 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10626048 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21242880 }, { "name": "model.layers.19.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21252096 }, { "name": "model.layers.19.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21261312 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21270528 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 21279744 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31896576 }, { "name": "model.layers.20.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31905792 }, { "name": "model.layers.20.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31915008 } ], "md5sum": "87f4ae324cf337d27ea8d44a4e91c254" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "febf39d31ce44d169f259985d0ffb0f4" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "3e28792bfb792e8114de7f748bfb2c0e" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "506707b235a017d2a2a45ff5a8b5e39e" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "5bb034ed2695face6bb364ffff10e5c0" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 25989120, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2359296 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 11796480 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 12976128 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 12985344 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23602176 }, { "name": "model.layers.21.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23611392 }, { "name": "model.layers.21.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23620608 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23629824 } ], "md5sum": "afd27488025f214a9c15d1b091ca5c50" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "496288931ea30a9cf7ebd267d72b2dca" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "7b64b614e8a62f125e6732033148ea14" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "540ca6bec424b394e3d318cc9055826a" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "82e7271bf09f06c6fa40b4bdd553e836" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 33067008, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9437184 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 10616832 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10626048 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21242880 }, { "name": "model.layers.22.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21252096 }, { "name": "model.layers.22.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21261312 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21270528 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23629824 } ], "md5sum": "9bb328b72755182c91bd45f33bc50130" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "d087092e4ef5eabe5147390a9b3b6462" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "02ff70964d13ead402fe6dbf9c7ebd3f" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 24772608, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 0 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 1179648 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22413312 } ], "md5sum": "e133970c70cbdb36df95c7f3d752006b" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "095ee05477e52cfbf34ee9b3f3bd7cfd" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "10a450a231df77480565a7242ffac39f" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "4f70f54a6f7baca4023ed3b02d5755bd" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "95014f5aab38e82e86ac48f9c82e7ad8" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "e9ed3806ae3cf22ae2d7ed5fe7a9e804" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "bf478e0d209c89c64e25be8ae4f80a04" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "c472f4f970c7dd90f0248d6052126f67" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 31924224, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9437184 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 10616832 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10626048 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21242880 }, { "name": "model.layers.2.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21252096 }, { "name": "model.layers.2.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21261312 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21270528 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 21279744 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31896576 }, { "name": "model.layers.3.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31905792 }, { "name": "model.layers.3.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31915008 } ], "md5sum": "99c920e2a1a302a04e254c87d0dfd1ae" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "d52cee8b45ac58c0aa8d99d84dad3333" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "5a7fcd8c22f9253be9ada2f47c9024c1" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "171d1100056925129cbaad1c52c52c21" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "8748e1f671a913d8a01369463d638562" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 25989120, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2359296 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 11796480 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 12976128 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 12985344 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23602176 }, { "name": "model.layers.4.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23611392 }, { "name": "model.layers.4.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23620608 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23629824 } ], "md5sum": "02b0429758f120e02f65c480cec4daed" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "901e9fb797c9f4372bed6c8cb00fbc22" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "808c3d2cba8849b9764fe200b1ba1fa2" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "73e1bb4dcc9782fb6dfd7227a7cc563d" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e4e0259ce7e9cfd993852b9d9628a818" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 33067008, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9437184 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 10616832 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10626048 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21242880 }, { "name": "model.layers.5.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21252096 }, { "name": "model.layers.5.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21261312 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21270528 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23629824 } ], "md5sum": "9ae1eb95d80752c14921fcb65cfbd795" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "961e54055e38476f052b6fd954ff1d4e" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 33039360, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 1179648 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 20054016 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 22413312 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 31850496 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33030144 } ], "md5sum": "238e7be9e6ac7379e5c653142724c2b4" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "d6ff763d16a58a54eee64194caee10d0" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "610dc5522438e5b0aa30b34f746a27e1" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "9e38c282bff631724eb92a9ef7f2c350" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0e15a90f7c12500c13355dbb644ac899" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 33094656, "records": [ { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 10616832 }, { "name": "model.layers.23.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 10626048 }, { "name": "model.layers.23.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 10635264 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 10644480 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10653696 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21270528 }, { "name": "model.layers.24.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21279744 }, { "name": "model.layers.24.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21288960 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21298176 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23657472 } ], "md5sum": "23819cd608ee5129979413b51b005abd" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "0e89141a67103d9d5793af91c6837911" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "73503da1c0b4d8b2242eaf94ec4b3702" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "aa4b680996c670ec8e53cb7a514298af" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 33067008, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 0 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1179648 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 1188864 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 11805696 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33039360 }, { "name": "model.layers.25.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33048576 }, { "name": "model.layers.25.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33057792 } ], "md5sum": "ca55a32a973349ede1b3b1314d531d7e" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "cbe8685e158c8dab4f6249ffb00efa85" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "bc44ff7a2bfa1714dc74a196f7ebd639" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "777bfdefe85addf0c05a4c3759dd691d" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "109d6383bb338dad6e958a5ad8ee70b8" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 25989120, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2359296 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 11796480 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 12976128 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 12985344 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23602176 }, { "name": "model.layers.26.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23611392 }, { "name": "model.layers.26.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23620608 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23629824 } ], "md5sum": "d3f28645a8f230dd4cae8a4620a8fa5c" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "d42c33d17cb01e71d4855ff810e65e0d" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "d58f8a23a69fc3bb34cf153b429ee090" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "ca21137bca9135f3ac5e7f7927b22d35" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "7de3122cd2f5fec29f1490e957295041" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 33067008, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9437184 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 10616832 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10626048 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21242880 }, { "name": "model.layers.27.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21252096 }, { "name": "model.layers.27.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21261312 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21270528 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23629824 } ], "md5sum": "f6d2bb970c72374810d61f2da4430000" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "518f4abdf1758c603f2465ff7ec078be" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 33039360, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 1179648 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 20054016 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 22413312 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 31850496 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33030144 } ], "md5sum": "cae7bb79a8b717a0920d19f294553df8" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "63cb1781b0fafaee32553dcc615b7ad9" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "1884fa696e8543badaff290871ddbe8e" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 31887360, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 0 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 10616832 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31850496 }, { "name": "model.layers.28.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31859712 }, { "name": "model.layers.28.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31868928 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31878144 } ], "md5sum": "e5dd05da3c45754e665a0ec24a7fa3c8" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "b7fe7716d6a0578e8d6f9048b906adfe" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "a9144483fefec9cf6ef724da3a1140ef" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 31878144, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 0 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 10616832 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31850496 }, { "name": "model.layers.29.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31859712 }, { "name": "model.layers.29.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31868928 } ], "md5sum": "2f49c6b045ca4ed66f2e4201e749bac2" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "b845ea35c5c639536d57840fe3b7f1df" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "c4c4f0bfc7f67bfc4f442883f0ed15fa" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "c9d9e299c573de74e5108d2820adef04" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "01165a6c5042a86625961e0b92c0d30d" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 25989120, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2359296 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 11796480 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 12976128 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 12985344 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23602176 }, { "name": "model.layers.30.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23611392 }, { "name": "model.layers.30.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23620608 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23629824 } ], "md5sum": "a8df28b1cd623302cefc50c203333247" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "4a62587afcabff67f436bed79ceb5e26" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "174fcba66796881cc6454cbe1313426d" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "8a9ce29c3fc016e4eaba8527ec399a0e" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "7c84ac0c41313658195f7ce7660e526b" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 33067008, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9437184 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 10616832 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10626048 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21242880 }, { "name": "model.layers.31.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21252096 }, { "name": "model.layers.31.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21261312 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21270528 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23629824 } ], "md5sum": "bf0b2a5223ebc4dd31239dae554bfb2f" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "2c96d7c3244d1bf5431e62f5da92bc7c" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "5e29d8c47fddfe64326a1d714f80dcfd" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 24772608, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 0 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 1179648 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22413312 } ], "md5sum": "4e80d7867681f840e562e6f9848974f5" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "c986ecb756c80d556e6e265e0f0386d2" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "f4f13231b6892d3226432baf55a22b9f" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "558e02ef3b7ffd894a310d9d0a208c70" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "7f3e75e6582ecca4e88d4ca4ad1a7f98" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "c48a644d67086abad3fb7e08a52a5648" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 31924224, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9437184 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 10616832 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10626048 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21242880 }, { "name": "model.layers.32.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21252096 }, { "name": "model.layers.32.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21261312 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21270528 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 21279744 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31896576 }, { "name": "model.layers.33.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31905792 }, { "name": "model.layers.33.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31915008 } ], "md5sum": "774fcc2846090fdb2c60e841eab32cb7" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "c86e9493fd420936083c92389c1ccadb" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "6a33b0f71dc4e28cf483c71c510b8253" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "b49548aeaee6482c737e32fa24f8e5f4" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "784e5276bfbc8678b08742c6c2b09555" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 25989120, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2359296 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 11796480 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 12976128 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 12985344 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23602176 }, { "name": "model.layers.34.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23611392 }, { "name": "model.layers.34.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23620608 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23629824 } ], "md5sum": "956260c5a4a668eda36687c827d4751b" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "63e8db18c172b957558063d2d40cb8a6" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "8835673478ab5ef402526e3ca5e1ea80" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "7227f44954c82feb5f1c30eab3ef5988" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "b6e20eeaafd47c5b19131181166b9797" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 33067008, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9437184 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 10616832 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10626048 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21242880 }, { "name": "model.layers.35.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21252096 }, { "name": "model.layers.35.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21261312 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21270528 }, { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23629824 } ], "md5sum": "52fe9e15bf3d2a3543c761ce0b7353a5" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "6d4b0552591774039788b74da87a0b46" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "7ab58ecce0c1c16ea1787a5fe90199ee" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 24772608, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 0 }, { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 1179648 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22413312 } ], "md5sum": "87ee62065f4c18facdda8d72991ff222" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "602cfbeb7c28f25f7bf2de8e371820ff" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "88ecd578a7231aa44f612848eb179ec5" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "14921a6d973da8953908f184e72921f8" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "83e4278e69e2687bd8b28ba09189c3f7" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "b8b7dab4db442af92171948037e396bb" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 31924224, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9437184 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 10616832 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10626048 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21242880 }, { "name": "model.layers.36.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21252096 }, { "name": "model.layers.36.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21261312 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21270528 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 21279744 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31896576 }, { "name": "model.layers.37.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31905792 }, { "name": "model.layers.37.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31915008 } ], "md5sum": "95fd0e45248c425f59f68cc53167e2b0" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "bd224b6576e2ddb003387509a47dd784" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "a4a42ceaf2701638c47d7f1b777b04d9" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "b151ac545a8c891b6ae72678b5296882" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "4b4ea490cb1d1fb31c8e32fbeb3b7814" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 25989120, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2359296 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 11796480 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 12976128 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 12985344 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23602176 }, { "name": "model.layers.38.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23611392 }, { "name": "model.layers.38.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23620608 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23629824 } ], "md5sum": "9cec83703f8794626f0426c62ef0b8e9" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "82005d021d728d1bccbb1fda07b02c24" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "4a2b98a7cd6875d8342113e649122950" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "9c3157b109e1dcc63728cafded4b62ac" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e0c606a2d4758f66b0009d2d73b54fd7" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 33067008, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9437184 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 10616832 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10626048 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21242880 }, { "name": "model.layers.39.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21252096 }, { "name": "model.layers.39.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21261312 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21270528 }, { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23629824 } ], "md5sum": "58b3ae81d1f698b4d51c445dcce994d0" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "014c109e560922c179b7230a27a93982" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "f9194bc35b26f91066199bdbe7a3fdb9" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "82cd8a35c345c39a9230b31e9016ea17" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 33067008, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 0 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1179648 }, { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 1188864 }, { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 11805696 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33039360 }, { "name": "model.layers.40.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33048576 }, { "name": "model.layers.40.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33057792 } ], "md5sum": "27e024fc3612bedc4bdc7a05f66e053b" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 31850496, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2359296 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 11796480 }, { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 12976128 } ], "md5sum": "76d462e3e746504923bb925881fdd9ed" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "cc264a409a6a4f1d444f17c7826d0194" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "e0000bf3f54b0f2620e37f0fe409dd1d" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "643407faa98af54bdc676a25e08c24ab" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "c60b5fff12d9ac252317c0f10706ebbc" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 23639040, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2359296 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 11796480 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 12976128 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 12985344 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23602176 }, { "name": "model.layers.41.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23611392 }, { "name": "model.layers.41.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23620608 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23629824 } ], "md5sum": "992a45bd7c3e0b2529b4a9a2f26a8eba" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "46cb8b08fbff7aadac1e87921b093cd9" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "1044ee9c2271ff7e8121b548fd1b1e5a" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 31878144, "records": [ { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 0 }, { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 10616832 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31850496 }, { "name": "model.layers.42.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31859712 }, { "name": "model.layers.42.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 31868928 } ], "md5sum": "f0eeca89fc0fd136681d954ba6db8c67" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "04f087ee058228343099d5cf45a64685" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "63b3a8018f48fd3a7c9d26735abf4019" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "9a03582d04c436063327bd6b7bce31af" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "72b32e484f9828ead9f8d7d5e615a646" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 25989120, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2359296 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 11796480 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 12976128 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 12985344 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23602176 }, { "name": "model.layers.43.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23611392 }, { "name": "model.layers.43.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23620608 }, { "name": "model.layers.43.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23629824 } ], "md5sum": "1290a794873a2717913de36d5d424977" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "a9e0b26c2a33481695ca153a6badf9a1" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "711e5cceadd6d83ef54a6f56bc4fb905" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "ea0cb8ae38045d4b24892cd0ae4bd0d4" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e1a9a2ae3c55ec14b06fa33994aec295" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 33067008, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9437184 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 10616832 }, { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10626048 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21242880 }, { "name": "model.layers.44.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21252096 }, { "name": "model.layers.44.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21261312 }, { "name": "model.layers.44.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21270528 }, { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23629824 } ], "md5sum": "595aaf1de88435e412d90f66f60efce7" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 169869312, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 73728, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 169869312, "byteOffset": 0 } ], "md5sum": "6a5dcdc29c3b23928d082f7e259b298d" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.q_weight", "shape": [ 8192, 576 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "bb8cd376a4366205bde2900caad9acee" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 24772608, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 0 }, { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 73728, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 1179648 }, { "name": "model.layers.45.self_attn.qkv_proj.q_scale", "shape": [ 8192, 144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22413312 } ], "md5sum": "780d6e42db0268e5fb5d050a36b3c7be" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 84934656, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 4608, 4608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 84934656, "byteOffset": 0 } ], "md5sum": "47e60cdc3ed8692f64dace821d032893" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 21279744, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 4608, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 4608, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9437184 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 10616832 }, { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 4608, 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10626048 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21242880 }, { "name": "model.layers.45.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21252096 }, { "name": "model.layers.45.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21261312 }, { "name": "model.norm.weight", "shape": [ 4608 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 21270528 } ], "md5sum": "7ff27e2810c0fc95f423b15dbec78bea" } ] }