{ "metadata": { "ParamSize": 805, "ParamBytes": 31776318464.0, "BitsPerParam": 3.4877930573818188 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 420679680, "records": [ { "name": "lm_head.q_weight", "shape": [ 128256, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 420679680, "byteOffset": 0 } ], "md5sum": "e6120f64ac78ad5364e91df7564514b1" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 52584960, "records": [ { "name": "lm_head.q_scale", "shape": [ 128256, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52584960, "byteOffset": 0 } ], "md5sum": "75233f1548d2fa6bcdac67dd9a81f6ea" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 420679680, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 128256, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 420679680, "byteOffset": 0 } ], "md5sum": "1e00f8c4a5217b03e44610b9e400a714" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 52584960, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 128256, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52584960, "byteOffset": 0 } ], "md5sum": "94f7de347cd3760c9e78eb935a022c61" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "0b0619f2bd6202265b2825384235166e" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "3952713f15634462b01524b3d1bdf8a5" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "0bbf55102342508ba104d3658873368b" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "c3b2abb5b57351d25d15bb617545e80a" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "0fab6b24acdae51d4bdf8ca6a6547bed" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "769d1498e6b5c823de150ee200b9a9e3" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "b7c4b4931e8a392f94f0d68d7b839918" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "fe207729164f763fb27991dfcc02745d" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "c309c484d6a630085852b88bf5d9855f" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "46efc546ec5b9bdd066b5ba95722a2ed" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 26910720, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 0 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 16384 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11780096 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15978496 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19337216 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23535616 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26894336 } ], "md5sum": "731dc5d0ff2bf48c1f548d6d42dd798d" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "f3487b6dadf1ebfff628b8d88652441a" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "e6a858ea341c4cccdc4556e4d97018ae" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "bd1074581a41fcac73d7017795b0a661" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "fb85f88a968198612fa7d3db1d7e5738" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "eb8fa8f5ffad5ce79c2c9ddbc8ae941e" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "f161f27ec9e1f3a61a4d0c278e2056dd" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "084a362b8455c8ede26b0df091ae0cb2" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "835701cc7b186ff72bd5e37e4a02ad8e" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "231447fbe6dfca2e7b33f61f66a8c08c" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "eecac60661434b8a985bb7962df4eb43" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "4f64a7a498a7003d470babd9cc79b3c0" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "709a2257a4d996a414538cb8cef62037" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "f0cd618609af606329dd3c93de611204" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "968fd8fa2e2d42e21f57b1466297c79e" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "7cac97018d8b0ce52220fcd1cdd7022b" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "1d79c40e6469c5fec4732ce79023e865" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "2e0298a9d5941026bd45180ba406190e" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "3b50cfc13a9ca062ffbe0321f93e74c0" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "addad4924a8196398bb566341db8c546" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "3373343cedf45dbf95b79a7988275524" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "2ad570f80cd5ab22133663da40905460" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "6883d92aaa8de8738072abbd40c880aa" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "8cd3f3d565a26d1e7dc6c58406196a63" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19337216 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31084544 } ], "md5sum": "44af8b0b36e8939d1db9d8e97b649917" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "86549a52e500454aa9e9e612633e293a" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "2025b392f124c68222498416ee666338" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "c97a6bb06e37f977ef56a43af654358d" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "efdc75970b2c80380cd49a98686f31e3" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 3358720 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26869760 } ], "md5sum": "6c0f7fe27d242868b29feaa14d39ed0b" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "5d606a3ab632f9d9d1d53e0295100f8c" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "168e7000ed48e2bc9197f04036f15dfd" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "cc8485da0a8a7bf77f6ef26b2a904fa3" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "b448c70fd045e1523c3bc64f670d8cd1" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "9606ea9dcf95badca2c7fa1bbd47496d" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "25061b202aa41469c75c69f4ad2d4157" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 15155200 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26902528 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26918912 } ], "md5sum": "6eafb575d6c076b92d3f53ec67b9daa3" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "c3f982098cb837a48ddb7f7b910ebe90" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "661523c83792078ab8005042145c1152" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "f16d98a1d854d123db0b0f2e4424759e" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "cd82b0b526ff3a45adfbdfda4e03ed45" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "2dcea643c52fdd8b9982d1ff3a984901" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "b45e28299a030dbad29fe27cac0ceea4" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "70c376581c0553a43966f3d0920689fa" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "be0b923e91c7b59ac35399f62c47d4e4" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "7cfdb434cdec00636c8852b552844a5a" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "7236bf88b5568eadf03352b6e48f4a7c" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 30269440, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 22695936 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 26894336 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30253056 } ], "md5sum": "2ee849b221bffbb109027254c886e14e" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "fb0a4440399b0fd39db400cb4b56c5c0" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "d47b4c3adb627812d2a64294c6e5ec44" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "03edbb3369689b4a65a6d2649330149e" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "3631516c6548ed1ac358a6a7e63c4d02" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "6a42649faa24f21d54f691972dc85f5c" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "c002c3f30e5ab50deaa73c701ee941eb" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "90d5a2b2d2f00473b8926a5a9ac063ad" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "b9bef15a2d903f80f5c80f922e48c12a" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "316c494616148ee3ebdd17a21d4ea7c8" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "a8a4a9ad6f742931595043747212b7e1" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "ce0c9b3068aae4bdd826d8f003705e48" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "c1aa34d9199b1c58da457c66e3de5340" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "7821cbf6bb0637f9d0fa2562db5f6a33" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "495262925803307d9fc404ac648af840" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "3a3aa03b0646fb9b9246af9a070e5eb7" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "7ac944401a5a2b2a3a70cd0bc9bdfe1e" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "323bf62aecc096e75c6e05c0111d6888" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "869534004df03868de1347304f64925d" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "bc1ec2cccb5f92c4a1a0ed4e3929cd30" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "32f0a402f561abc84dc8057853b514b5" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "b16023d2827ab69896de72a731700871" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "552cd96fc0dbe4da32afd11ad9fbc055" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "8aefb8641fc8aee710a582c1956485be" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "aa80fa199096134172f07b7958820c89" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "b046383bba2d4fd68b5aa4b5b046aa4d" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "71a2f9fdcad13240c2ad4486ae32cb1a" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "037ab603701fbde61e63e3ab4c7e92fa" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "15ab6f65f5ff3859b5949d11f19e2005" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "1aeeceb78f43174a7195b33c9630343d" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "943b5bc506f8514739e0ba2894393803" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 23535616, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23519232 } ], "md5sum": "18dab4ce0dec6b92ba71d0ecda4d2f53" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "081ed3c3bf5cf296359febac138af0ba" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "37fd021e2a1c97cf33bc9dc56b5e300b" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "3c73ce2d010e4e158814f31debf86bf7" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "32d1f51e8793173372f5072ab79e0079" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "1b9c9682b06c75e4548ddb97e5d3ef6e" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "b5deb3e5e709337cd3639cfb75a47c5c" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "28d6aa59a5818cf70616c4630a8e7e24" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "8fe1f825405085d5ea665b92f008cc41" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 11763712 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 15138816 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26886144 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26902528 } ], "md5sum": "f93bc3200cdbb4d9afbda675a33b9b58" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "5ee1f3138c079e56c4459686aa9790c7" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "25cb0fc81f5b1771b3f4ab34722e8196" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "01ee0dea54da63b84d3d9236c8a97c66" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "33b1413bbfb974bbe7073f16b5d4a755" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "f7bce52584de167bcb7b1480bbae8556" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "aff465abd32b8444f7d06a4a41efc2dd" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 22712320, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22695936 } ], "md5sum": "53819388d339595cd118409152985fc3" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "eaef09f444c1220361b026659927d776" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "9f762abcab103afd44bd75a1fc7ce925" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "04b1afb96b2a4fef4aca53f570380499" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "0907685e4b50310b4ffabbe198249183" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "aaddd7472c519b625534e6a321b4598b" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "e0fe84de95531dc6aba51a470495cba6" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "5e74b4f233eda49a266219742bd140e5" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "b6eb01a0e2c40724179a3f85d98eb5d3" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19337216 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31084544 } ], "md5sum": "25b575c47a4fbec42de5a2d067fc84fb" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "190cc4731a8b2b5c313b853d6cdbe43c" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "662fe03b001a9701849038b5f1006a54" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "6a2e2114d9d6f0f389b7a7264c731bd9" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "d8d718cbe9a96d21d7f43e5af7f755e8" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 3358720 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26869760 } ], "md5sum": "83794a554e704e21c43024dd54b3d6d7" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "c56ebbad11300991e52e76885a8521e5" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "9def14a7a34a2501fc5e60743d8eb473" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "1060998fe6882d1c3a4228734bbc211b" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "5e5d81d88156705cf4c87816d3c236bf" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "5fe40351874054874a9229d5ae113879" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "948a82310ad599128922b25aad1efa74" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 15155200 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26902528 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26918912 } ], "md5sum": "12972dd1af507b15a790596699f5979e" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "9b2490c27890cc766c0fc2edd8e0799a" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "f9638a57b11544ecccbaa1f9ca25384e" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "8216e2c216586b688453d338129eaf35" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "ff6caf2462cb4226876ed116e6387763" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "9e3f674af4029b557a8cf079b75252e6" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "04329925928cfc3b4783b079ab05818c" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "e981d04b513c6858e7607304e263549c" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "f916ef2f211b901953b92da1e74fe1bf" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "dd2e6aac95cd07b17b060311e57be01f" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "b494d8bdf340fcadadc35a74f02652b6" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 30269440, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 22695936 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 26894336 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30253056 } ], "md5sum": "26adf714cf190de7eadda9e78a5748b7" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "576326d6642b9e134b3262c62eaf2cc5" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "1fb251957d3db310355f18b933a8f6f0" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "4684679d6932b812c3a6828337a36348" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "1c8f4431cddb318da500a8890584d7c8" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "5e1926b711a25605e80e74ea8aa1b513" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "b61cb2dfdc5b65c57706b543a73eb9bb" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "3f3913392787a3e219a03664af7cd784" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "9581c0ecbcb0d6be9127f24f30ca5b9c" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "d4112d90461824d8c39583628db07250" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "1eedd28c011b0226d1f314fdc5f4a6f7" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "0ec6f07840231ac7617b21aacab34805" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "20955b58fe2f16cf2144a7ddf222d074" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "24204ca103c14175725c5b87382e2387" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "fc5d50595e7f71ac768d016be6882da8" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "5c6cec69162057fcc24ac2cfd9b2e1f5" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "4b9cac74a5b8a4cacb26f1ebf94f2f02" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "7698165de68dc13eba4436c5eb075c3d" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "2f213461c19d0e67cc2bd9d5fc4e39ba" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "d0d33928e86b0d8594971491d00524a9" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "f0ade861093eb3b39c4e2c4d38cd3e3c" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "59f30c34a9b5e7df75674edafee6a9f3" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "8d54992b4b7c22f87f69c92b805eef19" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "5137753d315cdadc699efb28746729eb" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "6082cb98a179ccc9d863ab317adebd18" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "604da6d1c25bf1d9377dacb76358808d" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "028a3cb0e1c91a9eb89302e3c3f3bbe9" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "369040d75cda2202f91823cb662f5ed9" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "094db311d7e5c71fbd435c8d1ab39342" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "0c3d87938cd2eac851391219ada58ddf" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "6638ddaee783e8ff4f130fb5c6dd39de" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 23535616, "records": [ { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23519232 } ], "md5sum": "8b727f31ea9b98d8d364ce71feb86878" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "7bc45836d1b708e8e733c70977f08b1f" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "56a7130310d20b740ce8876e55e864e0" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "6893dfa24ff1d5f4b676c4b0d5708fed" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "3ee70722c502c6f8e54c13d4d50abcf6" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "4fb01674831aff84da3e7c4686b8348f" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "72136f3ca5b124b038e2b0a3d35b5be0" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "2a9a073cfce4c7d1c3a6ec0374d0e183" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "703f3cf29e158970db4b5658bf66265e" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 11763712 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 15138816 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26886144 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26902528 } ], "md5sum": "d5cba725cc7a74847d68f364719a5a9e" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "cebc39cd26c3afc0c2e98b8d2f5eee98" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "5ef7b4dd3f60639287bbffc8275ad2b0" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "141911e665c4a3ba4c9b7c76de2f3c31" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "31903f6632a324ff946bdfc1caf31686" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "fc222d9f1c218f1242b29a3cf545f049" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "252e3f6e9d081d637e2b29b13a1749c0" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 22712320, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22695936 } ], "md5sum": "a4d8293c95a119598da865588df7df0e" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "15748c7147c1d1262c9a917253d9a55d" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "144acadf4bc75223c97cae1c4b5a1808" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "80bac6c071abf5171bcbd24ea85ee7f2" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "8bc1cb8a461dcfdab225e228b3fda405" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "e654f426f28f457b0f20f4fb6e22960b" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "82685bfa1281bfddd120363098f76b85" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "32dfdcb7b69731edbafd19f22f9f813a" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "e153cf79da8a4a4da185e059ab0a5947" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19337216 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31084544 } ], "md5sum": "bf5c1e108c62ecfde7c025bf7c962850" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "ddf6e72283284fb8ead873ff2ff5556a" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "cae3f584863bed4767e79a7d7423f1ad" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "213a71c4dd0a3953e3a2cd93484e80e7" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "97cc377a4f2f74f65af6484977b9f198" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 3358720 }, { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26869760 } ], "md5sum": "95a43be584b4e97676122e5efa715b2c" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "d7fa43235c9db41980f099648123cd4f" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "4573b90884e983ff1eebdb884d42a9e9" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "1cf8d0c6a6612655200545020816988c" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "69ecae320de1b8af56eba92a6b6cd917" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "1b9d29ec2c8eda5143ccd43035c900e0" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "7ee287c80d1bf82a7c6a62407d80ff3a" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "282100e010b3e5647522d2e582a9f5d3" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "0e7cc7ebbc042529d8fb6e33e17be973" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 15155200 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26902528 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26918912 } ], "md5sum": "2da02a12aed8c2c3e860e298c1f39bf8" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "db5093fd05a79b8ee8692a1ba4475449" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "eed7ffa9c57dc0d59a03765aa5e5b205" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "a2cc5b9de10fcfc765bda89cdecff0fd" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "475c6583792d3ca197a39a66385719d2" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "90383b0b2aaf40355c2dd853aa36bee5" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "ac9b29ad28921f7f6bdf80dedcf70f17" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "5ed402d2d9d6be303d1b4d3a9a108270" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 26910720, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 22695936 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26894336 } ], "md5sum": "2923562f1135cd8214b9f2b5fb2ca3fb" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "73e085965c2d4ce0e1fa98884e18c47f" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "c5cf644e70d6eb4413a6b09ddbaef744" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "1935715d1f75aa0e4025420d562d031a" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "35498b4beae0f531ea157a99e67a8d0d" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "d76f3a6847397b133a3cb304f9d23663" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "d12aca8c1cc17c4662cc90ad60078cef" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "855928b6c38663474aa48c192956ad98" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "8c577923462e1ae1e01675dbc96777e5" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "dcc02d8f66ea9cf84e978dd775763938" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "877942889d693882753cf45d4d7f2bfa" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "db5fd9b513fdbcea737c6ca5ce284427" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "b43146e3fa5487b7fa77457f76a1d786" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "d3c24217a14bc9388de9a076e841c662" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "be26d97657908bd24cf89cf3ef0ca670" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "32a37d6b8e407f8cdf817668741c5290" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "e8764cc59a0b9168e7c8176b421939f5" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.42.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.43.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "aaaa03b52c41bfd5d06d11ceca41b525" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "79858c480199811d1668ce4a59c02120" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "a9fa70ddcf33b2101e403fce9cb21cb5" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "2387fad9ed07d3cacf40bb2036bdc35a" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "2bc8c2681c22eddea06e819a773cf6a7" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "35be5ea60acd8f81de2c4f0e41a6b443" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "0fc6875390b2af58fd3cbc7962c4ec61" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.44.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "34b238eec29f18f1d98b494bf20ce81d" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "eec569fb9aa7a6071c39619af02d887a" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "9e78a85a1fa108702b0eec63e003fc07" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "2afe4dbb4ed9d7b5e5798ccb07ffa14b" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "168a47af315bec190c7c72e19215f47e" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "d572f6685c4452323326495210028fd8" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "541e4408987132a4f71167b815df43ac" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "3982a475dd5845c8b43f00b49557e8c0" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.45.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.46.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "e04e5a3d6bd4632c560c9afecdb4c446" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "6bee69d487d977f2c40f0ebf5a155cab" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "030bd5dba10b2809918a4b2a8582bb80" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "6f6be5ec403883184cd1016e14c21af8" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "59809fe08549547ff15f1bd8ea4bd8a2" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "e1d75dcdaef7da422bfd3f8af24049ca" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.47.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "c294a1c2bf7bb40bff7c916902f0c0cd" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "86ef2048273a1f09fee41596bbf7f223" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "43d2541a6ca416721922afd7b3da86d2" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.47.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "073edf7be96eece503db513d45dab70c" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "80328702ed46c70005b3b02ed794a640" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "bfe6eccc58ca402d8c5b9e7f90e20661" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.48.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "e278d5aa60f8c287b0004037da078ef7" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "7fcc96c2ef7840f8e1e33ad20add3994" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.49.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "4e1ff65b22ece4a7847d64523eddd3f5" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "51f8db3d3c7bf8c68edf0f5cbfed4381" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 23535616, "records": [ { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.48.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.49.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23519232 } ], "md5sum": "ba432571a9d2205c41230eaf0a2e5902" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "15cdcdff54a007c2a18c05baa63935cc" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "1649aac5497716af052d6c9bdd3389c8" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "767fb5ac17bdbbd94ba61c3924fd0210" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "31630ec909599baa8338c66309af6bdb" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "0fb80cfe07b936b1c8f82ea1ab08ccfb" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "c6a8b398da62db3b9001270468a8ea81" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.50.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "f7087eec04fd39be355d46472d31eced" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "aca047ea28e4af30e99cd270c6f6da46" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 11763712 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 15138816 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26886144 }, { "name": "model.layers.50.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26902528 } ], "md5sum": "c7dec6fa0eb5934f165c55f64a5db2f6" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "19d67854ba4e7e37cd4b3ee8b8bc7208" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "a15599a4f2cda7220ac9e408d03f1f75" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "ff01f5b628d5c5bc665a5f699084a18e" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.51.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "3ad478363372a03aa23da59a71ff5eeb" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "d5b21e53ebeb026ff1a911d0b84bbf37" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "125bb0cb08c8fc2280142c79132b2c3f" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 22712320, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.51.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22695936 } ], "md5sum": "5f0a12964a6d53cf87f1a3c791e1b8e4" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "9ac6fca1f3a640bd6f6b47de1e81b623" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "320f67bbf1bc436d554d03461e9260a4" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.52.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "4cf42d1aab7c4bcdc7ed2c3533762fee" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "51b72da16fc3746f267b7c107116ed0d" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "6732f008ff7245288df814bf87980979" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "a0ee7983420d51c9f6c3ee2b403ead1a" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "d4333dead02c2b760e2f247bbe4129fd" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.53.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "2414163b2b659001b231b7d12eeee74c" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.52.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19337216 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31084544 } ], "md5sum": "5bb0f39f8437859dc0a5bcb7b32226fc" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.53.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "a706c6a3faa8fa0d082cb0539d582a5d" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "f4173a0c130368e26f9c16f96074f116" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.54.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "37cb13d8c6958ff569b9aff36ddfe736" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "e836ac69e1b12cce247460ec2eb6f5ee" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 3358720 }, { "name": "model.layers.54.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26869760 } ], "md5sum": "f4d2f46f8e682120ae03acd938d101c6" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "c47fb6bc33dfe94b3edb2d54143d9582" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "fc1c6bd450cfe61c7c249e078e1e16af" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "a641f6573f7a95a2d30f2be167aae3f2" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "f32e314a32aac9fcc8f10512c42061fd" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.55.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "e07bedc49e23b9ecf38372020a3389a5" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "c14b5791c4127c05042c26209be7019c" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 15155200 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26902528 }, { "name": "model.layers.55.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26918912 } ], "md5sum": "c706a7ebc7cb43d6f1e1a6bee03161fd" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "375e259b2c462d733eb7a76758f4fe2e" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "a8dfd56e335de128a83cf3b504a0fa88" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "5133130107d9b74e6844da34a5ecb651" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.56.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "cdc9ee4e1d791f051d3c4893d896cf12" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "c93bb2dfaefdfdd89c9c6944b5da3c3a" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "94b2ddbed9480609c701110bf816f855" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "c493e974a111054b9118b885fec06676" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.57.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "bd258b77c3184cf4fe0c41885f17f38a" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "8de59bce7dd1d60c7600e43da304c39b" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "d4d3833292879ac64657accc7a4bb054" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 30269440, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.56.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.57.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 22695936 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 26894336 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30253056 } ], "md5sum": "8ffeed8816b174885f4532cc357878cf" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "b7e0f41a2b00ec35fb4b03e63dca8631" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "926138273d612e2bf483e1c0ecf973e3" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "03d6dce4be0fa2ce04eb5e1bba344747" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.58.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "08be3df959cd284f8001250081b4b302" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "c1f3199a3b8f19b05aa6f0ddf170f1d0" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "2381de75488be6bafb4e680fc229a6e3" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.58.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "c99dfcb0214cd7316bfe05d6820ea888" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "dd4a16c7ffbb7e64469e6d3c1d12e515" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "c1b60e1b612c554f2840a73dd05d9d1d" }, { "dataPath": "params_shard_324.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.59.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "ccd31a854ca1b470835df087aef1e979" }, { "dataPath": "params_shard_325.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "3749894f9a2a945239fe51d8cee9ac77" }, { "dataPath": "params_shard_326.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.60.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "f550e7021c85ff46d1ca4adf381b701a" }, { "dataPath": "params_shard_327.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "0da60a5bce4068b795f6afc4679ad4c6" }, { "dataPath": "params_shard_328.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "070ae8cc2e8ba26c35d39e90590bf8c3" }, { "dataPath": "params_shard_329.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.59.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.60.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "96c287030bd5502c3de75e7757800b70" }, { "dataPath": "params_shard_330.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "dfebc96233e9d04537fb2fc34c651615" }, { "dataPath": "params_shard_331.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "c00ecee0bfcf5a8c853e7b6ecf2a046b" }, { "dataPath": "params_shard_332.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "46ba3ddf462361bbe2a1a9433385b4b9" }, { "dataPath": "params_shard_333.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "ca9119eed2e1aa6f52e1fc5626d03145" }, { "dataPath": "params_shard_334.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "e40a032a69624c505e010f2a4c83d18c" }, { "dataPath": "params_shard_335.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.61.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "0ba4be6c3ac4b544e052ca15dd763757" }, { "dataPath": "params_shard_336.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "8cdacdd4ad8d33717fc0537c41c19eef" }, { "dataPath": "params_shard_337.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "35c0a2333bf088ca4c29956e46774e67" }, { "dataPath": "params_shard_338.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.61.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "91a691e76bf0c71efbeeebd6910d19b8" }, { "dataPath": "params_shard_339.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "2e641323ad5cc94264150c89f642e58c" }, { "dataPath": "params_shard_340.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "4a81033530e51a0b5f9a3e426928687c" }, { "dataPath": "params_shard_341.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.62.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "cf863d623022ffefec1c6bddca30ce77" }, { "dataPath": "params_shard_342.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "d1ba400ef0181ec1b2e80581df3fb812" }, { "dataPath": "params_shard_343.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.63.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "502a5e1bb7c08fd55dd7f2010cb396ee" }, { "dataPath": "params_shard_344.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "7ff51223726f9911051b98fd47f137ed" }, { "dataPath": "params_shard_345.bin", "format": "raw-shard", "nbytes": 23535616, "records": [ { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.62.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.63.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23519232 } ], "md5sum": "b24126ca7fda83a593c6d28b013babd2" }, { "dataPath": "params_shard_346.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "6db9595009ed587b4af9c7da6016dbe1" }, { "dataPath": "params_shard_347.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "42e19a5583b39909b9d9f8ca345d6003" }, { "dataPath": "params_shard_348.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "117edd9070da4d13c3a7cf1be7c7e6b1" }, { "dataPath": "params_shard_349.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.64.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "1737fe744ee4de337c8ed4ebee040774" }, { "dataPath": "params_shard_350.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "8eff5819ca657c50303bab3395242ee5" }, { "dataPath": "params_shard_351.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "1caa023793ea87ad027871bd30792685" }, { "dataPath": "params_shard_352.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.64.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "5830ab066f2ed81166bc1613837faf19" }, { "dataPath": "params_shard_353.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "4e29d615b45b316944e02dc97524033d" }, { "dataPath": "params_shard_354.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 11763712 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.64.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 15138816 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26886144 }, { "name": "model.layers.64.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26902528 } ], "md5sum": "e648e5d1a27f4804c170569a8caa4546" }, { "dataPath": "params_shard_355.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.65.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "56bec52e6ffd6494c3253271d4739dcf" }, { "dataPath": "params_shard_356.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "2b4da2e2ee0e1701d2343ea99b42e2d0" }, { "dataPath": "params_shard_357.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "c85f48c123059ec911aa0d583b54741e" }, { "dataPath": "params_shard_358.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.65.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "911d47bd485fbf0a5c5d3593f51f04d5" }, { "dataPath": "params_shard_359.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.65.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "11734110be2da463e1efddcbfa1c6e40" }, { "dataPath": "params_shard_360.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.66.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "df8e4af381b4c6637ed4c1f5263b9f39" }, { "dataPath": "params_shard_361.bin", "format": "raw-shard", "nbytes": 22712320, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.65.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.65.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.65.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22695936 } ], "md5sum": "6f1f6fc46e7f954ec1b7ad2cc9577230" }, { "dataPath": "params_shard_362.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "7e3c5eaaf58dd59ebef1c278085de736" }, { "dataPath": "params_shard_363.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "1e71000cbd9630ae0ee8e9d46a48699e" }, { "dataPath": "params_shard_364.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.66.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "85e32524084cc97e2d80a6cb8e4641b4" }, { "dataPath": "params_shard_365.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.66.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "3041618627ef9706e7920f872aa6e4d9" }, { "dataPath": "params_shard_366.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.67.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "ae26c4621d5d0e27c74aec2c3a8a607b" }, { "dataPath": "params_shard_367.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "a815b1a711411cf61060c8c5a40eb760" }, { "dataPath": "params_shard_368.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "43c9ed1ed0e295b0bac0edfa65d17224" }, { "dataPath": "params_shard_369.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.67.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "65a302d4830bdb67db5a2e70dfc11e18" }, { "dataPath": "params_shard_370.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.66.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.66.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.66.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.67.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19337216 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31084544 } ], "md5sum": "15650aa0b52ff03a687a6c8a15952904" }, { "dataPath": "params_shard_371.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.67.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.67.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "d70d34eabff5776b97fe02fe3eea51a5" }, { "dataPath": "params_shard_372.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "c644be31f0e929c678072a29078c411b" }, { "dataPath": "params_shard_373.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.68.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "2a8dc50780e430f81ca1a72dcb7f8d3e" }, { "dataPath": "params_shard_374.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "bafc0195482906681559412959a1eeea" }, { "dataPath": "params_shard_375.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.67.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.68.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 3358720 }, { "name": "model.layers.68.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26869760 } ], "md5sum": "07dcba6901b540a4771a5c223cef09a0" }, { "dataPath": "params_shard_376.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.68.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "135abacb8b41d64c7b53ac7d4027382f" }, { "dataPath": "params_shard_377.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.69.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "30303330b7af5cf47e9f2f1bbceef334" }, { "dataPath": "params_shard_378.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "71a125c455e9ad8b251db9802e5c1700" }, { "dataPath": "params_shard_379.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "cf23ec7764a6ce8c1e6fbf14bba8c9d0" }, { "dataPath": "params_shard_380.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.69.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "9959d32bae0219ed0c298d7316445370" }, { "dataPath": "params_shard_381.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "ff368e9be87456ec442eb49c0a9faa87" }, { "dataPath": "params_shard_382.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.68.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.69.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 15155200 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26902528 }, { "name": "model.layers.69.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26918912 } ], "md5sum": "6bbdcf6b2543c30bb3b9a9dd81ce7101" }, { "dataPath": "params_shard_383.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.70.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "2e18d003684597fc40a3cc2a16e44c37" }, { "dataPath": "params_shard_384.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "ba924114fab4c90e4a55c7ab867f515b" }, { "dataPath": "params_shard_385.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "07c7b29a82a8eb38b9a914da8de70f54" }, { "dataPath": "params_shard_386.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.70.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "bc82c949c86d3743e083d3476855f035" }, { "dataPath": "params_shard_387.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "69a8718dc059981133f9be2d71ca0576" }, { "dataPath": "params_shard_388.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "eab41f18ccd95cda77835231ad4725f1" }, { "dataPath": "params_shard_389.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "afedcdc2c9616bb75f5542e125dde6d8" }, { "dataPath": "params_shard_390.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.71.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "aea28d0b8c8d43cfb56206c9b1a4dc2a" }, { "dataPath": "params_shard_391.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.71.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "ee4226fb0e026a5e8914a6effc8d3e19" }, { "dataPath": "params_shard_392.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "4ead71c97d57e441c3dd1b8609db5992" }, { "dataPath": "params_shard_393.bin", "format": "raw-shard", "nbytes": 30269440, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.70.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.70.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.70.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.71.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 22695936 }, { "name": "model.layers.71.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 26894336 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30253056 } ], "md5sum": "3d42ff63c0efea118bb0938548a90b0f" }, { "dataPath": "params_shard_394.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "37eef046669e8185cabdfdd2fa42200e" }, { "dataPath": "params_shard_395.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "98bf54339ecd14f46e6117d49f993211" }, { "dataPath": "params_shard_396.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "1f83d62529dfc1e87f489411214d13af" }, { "dataPath": "params_shard_397.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "95e4444297e2d4d11c45f354b8ec502d" }, { "dataPath": "params_shard_398.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "9b7c4e762c761047fe7539a5bf6d9db2" }, { "dataPath": "params_shard_399.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "c683d2f81a60a26bbc240b82830e8e37" }, { "dataPath": "params_shard_400.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "05385db026e47b4cc6ded2029a9f7241" }, { "dataPath": "params_shard_401.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "ead216c53a9494c9ad19f643ce0df4ba" }, { "dataPath": "params_shard_402.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 11763712 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 15138816 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26886144 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26902528 } ], "md5sum": "057df2ee376023ab76c9ca6b283fdb3f" }, { "dataPath": "params_shard_403.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "d6da43e84bc12b1fb8a578b75a6b4cff" }, { "dataPath": "params_shard_404.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "06c3f2e61355b7eed52ea1cce48e5033" }, { "dataPath": "params_shard_405.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "400504853134644eb09ef6c376443475" }, { "dataPath": "params_shard_406.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "522e359766ce5bd159fd9f87aac7ab26" }, { "dataPath": "params_shard_407.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "3919fdc00aae5ccc06159e3b5bd702af" }, { "dataPath": "params_shard_408.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.71.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "ee85773f673aacdef2e81aad51a1a480" }, { "dataPath": "params_shard_409.bin", "format": "raw-shard", "nbytes": 22712320, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22695936 } ], "md5sum": "a884398d1a0d16dfda107cfe860ee208" }, { "dataPath": "params_shard_410.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.72.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "739f0742c52f62854dde1bbb7b5f289c" }, { "dataPath": "params_shard_411.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "267bbac4b1dbb11a6fd75d0fb0f9d684" }, { "dataPath": "params_shard_412.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "ff79b40f3cc1e3e4a45f37db69f79683" }, { "dataPath": "params_shard_413.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.72.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "ea449fae933bb2f6e073c4fff9052b4e" }, { "dataPath": "params_shard_414.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "b912f7caa9da2d9a3c1b3b170215e07a" }, { "dataPath": "params_shard_415.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.73.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "a3714523341997bbf361da50b0792c85" }, { "dataPath": "params_shard_416.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.71.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.72.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.72.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.72.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "d5b8d04717fdb5b3545078a03e5f498d" }, { "dataPath": "params_shard_417.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "8f9cb69d58eff80fb8c8920cf302a29d" }, { "dataPath": "params_shard_418.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "864f2091b09e056e7d565f115ce3b79b" }, { "dataPath": "params_shard_419.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.73.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "579d945b2ca8694dcc9f9f1bcc239a46" }, { "dataPath": "params_shard_420.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.73.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "a896aca08b0cdb4760415305b503cb7e" }, { "dataPath": "params_shard_421.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.74.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "759f055cc224bf753822c3e370e8a9c5" }, { "dataPath": "params_shard_422.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.74.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "c62bd237c392e32a7326e4b6a54fdfca" }, { "dataPath": "params_shard_423.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.74.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "76fde13abb5eb6ee6cc482d863854d71" }, { "dataPath": "params_shard_424.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.73.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.73.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.73.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.74.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.74.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "937daa4e63bf1b9b845ef4b95a486cd2" }, { "dataPath": "params_shard_425.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "1b205f20c1deeedbad281083f1726f59" }, { "dataPath": "params_shard_426.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "f090ea2470c6528e0a2dc8acd4b8a1e8" }, { "dataPath": "params_shard_427.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.75.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "8205fea603712eb12e2befe8c7c1c948" }, { "dataPath": "params_shard_428.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "1aaa0b6c76c74da08b1ae145a37a867e" }, { "dataPath": "params_shard_429.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "eb9ef0dca894cdac9a92e1374f59955b" }, { "dataPath": "params_shard_430.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.75.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "fa6ce393cd4a59bd63361e66785bb5bd" }, { "dataPath": "params_shard_431.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "5d4cab5cac17c391b3904b08575eea53" }, { "dataPath": "params_shard_432.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.76.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "731e9acd4fd4f3dffffc93b15b2b5a76" }, { "dataPath": "params_shard_433.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.74.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.75.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.75.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.75.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "87de7492014a450c7a8a2ae0d18071f7" }, { "dataPath": "params_shard_434.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "d710f1826fcd34c1be81f2ec6e9a6cab" }, { "dataPath": "params_shard_435.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "f90af15611a6456fb9981bcbf0e24aae" }, { "dataPath": "params_shard_436.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.76.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "39b4d7424827a3de8a14c33e1e5a9f5f" }, { "dataPath": "params_shard_437.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.76.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "b1580bf91cf75d9aa16dcd21297f3435" }, { "dataPath": "params_shard_438.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.77.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "4bf9e0bf8a17f4eae9f0178696519fe4" }, { "dataPath": "params_shard_439.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.77.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "3ea16630003c2499aef10538119cfac6" }, { "dataPath": "params_shard_440.bin", "format": "raw-shard", "nbytes": 23535616, "records": [ { "name": "model.layers.76.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.76.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.76.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.77.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23519232 } ], "md5sum": "6a2843e0c5bcdf102d92c0830b75ea07" }, { "dataPath": "params_shard_441.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "819dd7e22c382164449f7e2567419d53" }, { "dataPath": "params_shard_442.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "70185d329360a7c1ccf5220f651645e6" }, { "dataPath": "params_shard_443.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.77.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "bf929702ed9e41351ee7306c104814f0" }, { "dataPath": "params_shard_444.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.78.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "450664d4b7bf9cfeb24c4ca65c1cff89" }, { "dataPath": "params_shard_445.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "2236548c01697500ee18aff72775a742" }, { "dataPath": "params_shard_446.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "17932d64e39e6f088c25955dfdee3876" }, { "dataPath": "params_shard_447.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.78.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "bce5343923c9ad6444be3ef21df09c78" }, { "dataPath": "params_shard_448.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "ccc94bc5aca93a86b8ad772f453dcc24" }, { "dataPath": "params_shard_449.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.77.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.77.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 11763712 }, { "name": "model.layers.78.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.78.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 15138816 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26886144 }, { "name": "model.layers.78.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26902528 } ], "md5sum": "5fc1da3d9659cdf75e9c383f07932a20" }, { "dataPath": "params_shard_450.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.79.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "fc98ee4778c7a8e2f6d91abf754a40b7" }, { "dataPath": "params_shard_451.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "f06c2b1fd7537dab5f026fadf29bd7ff" }, { "dataPath": "params_shard_452.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "ed5698818d19155962fefaec95743d25" }, { "dataPath": "params_shard_453.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.79.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "ed30b6ba969b741f21a1a117750a5041" }, { "dataPath": "params_shard_454.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "a81c96c4b900767ad0604c952bf2637d" }, { "dataPath": "params_shard_455.bin", "format": "raw-shard", "nbytes": 22712320, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.79.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.79.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.79.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.79.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22695936 } ], "md5sum": "f2c8f09d30db51c32ff49ddb96fa82d3" } ] }