{ "metadata": { "ParamSize": 605, "ParamBytes": 20333316224.0, "BitsPerParam": 5.000668485990747 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 106499328, "records": [ { "name": "lm_head.q_weight", "shape": [ 32001, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 106499328, "byteOffset": 0 } ], "md5sum": "8c65a5d09f5ee7aa3fae92ee870ddd63" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "69f8a10ba032e4d278b2704ad52c16dd" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "e9552c125ba4126f0bc010a2ca7c4e7a" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 20780448, "records": [ { "name": "lm_head.q_scale", "shape": [ 32001, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312416, "byteOffset": 0 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 13312416 }, { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 13325728 } ], "md5sum": "292c28531f1125874ec6ca597f902cae" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 106499328, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32001, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 106499328, "byteOffset": 0 } ], "md5sum": "a5ef1c9c9c633df60319d82d0869d02f" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "9a8e4291cc644f631d9b63d195225ac5" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 28261792, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 0 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 14909440 }, { "name": "model.norm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 14922752 }, { "name": "model.embed_tokens.q_scale", "shape": [ 32001, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312416, "byteOffset": 14936064 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 28248480 } ], "md5sum": "de4ef00a0931f9942bc3568d15901376" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "158231ddcea20f7ef189e3692b26b17a" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "2fc80a755a55aff43493276119d908e7" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "e45da9213a8e86ef5afbdca61f98bf4c" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "7471b1aac3a426995c44e091e1a93621" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33453056 } ], "md5sum": "71ef66d92b609c509015a9a0d5e5f2ca" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "f823ba6bc7fb24c7f8c9c1acf18d81b5" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "61228723193e016929dfd996570fd4df" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "aecc628a7e4705b0e835f8a1e3b245b6" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "31edbaffe2188e3120b135bd97fe3da9" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33453056 } ], "md5sum": "d9b2c1d15d23183e60847b38bad77016" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "ec16dc815e4c01048e4b6721fa80942d" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "4c0354fa2e56842173b8109bd40f8241" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "0c77afdb7f19cfc1f2a9ee1d03729e75" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "394b8881a1f8c4a62fe1cba5ac6ac575" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33453056 } ], "md5sum": "8a82938ead85cf3bddf0f9dbea3ea5bf" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "1b4a6dc4105f361f99fbd52fe0186199" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "1d997de71f89645b0f93bfb4e310e41d" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "6459d4bedeb865079c64024763e2acde" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "302339d18191dfe0b2636ea840dd288e" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 33453056, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 } ], "md5sum": "8478de19a503697c94a5a26c67f55770" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "88bbd18fd15551a51450fdce909c72ac" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "3d215e11bbf99a7cfd429b5a2d7b8db2" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "893c9f0884144c8c20f36ac3abf90c8e" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 30697472, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 0 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 8306688 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 8320000 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 15774720 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 30684160 } ], "md5sum": "4cff5eba48e362150396bac5ff58502e" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "f4d6eeacd314a908b142d14f53ba3e7e" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 33240064, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 8306688 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30457856 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33226752 } ], "md5sum": "3863d5a922deab10e244b38194ae2ad2" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "79c3f49f9a6fd0881c6f13fb852c35ed" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "210eb47e3c938644d4dc7fbc2997382f" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "43c44365a38eb64df25701a770876136" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "4414e88bd54643c0ba0545c87b2fcabf" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33453056 } ], "md5sum": "e03786123caf2cc730ab1520eec04b5c" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "07da47a7df6b2691539810e16ad617d9" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "08c3933e4d251b05d54289639e6ddfd0" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "6c46bb4c389fba7760713fecb78a189e" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "68e43ea7f76e42edfc44e0f6c7b806fb" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 33453056, "records": [ { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 } ], "md5sum": "f25043c6767d08950a00a3cb039dc003" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "96cc0529dd1fe385059cd386feb3943d" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 33240064, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 8306688 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30457856 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33226752 } ], "md5sum": "ead2f37b768b2f2c0a836f13defe87b6" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "0cfdc2048d04d8c567f30c98161595ce" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "5b8a7ac8761deae7f602cc67e3b2b0c1" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 22390784, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 0 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 14909440 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 14922752 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 14936064 } ], "md5sum": "7fa9ff09b4ef5368effd39e67e2acd06" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "2dfceed2fd9009587da8b3e4a633b18c" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "5f196a8acaf4c54d2e8d9e734c48445e" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "85e26cd346cfcd0018f2f700712b82f8" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "0e7d3d3b18e6ad1920038be8441b2e4c" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 0 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 14909440 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 14922752 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 23229440 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 25998336 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 26011648 } ], "md5sum": "fa8b683e584ab61559a8a72c5bf22fd8" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "943a2b7304e2c19c086b5ba7dcc6d644" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "e20f97d43bde958be836044008e8dbac" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 22390784, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 0 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 14909440 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 14922752 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 14936064 } ], "md5sum": "286561e0ba877cf6b43473612f865d25" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "f3f4affba25e5b9a68efb44fc1dca071" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "c74bf05fe4d74c03c40eb0f965df6ac5" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "5e41a9300cefbb0f776a115bd01a218f" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "eadf12923adf301b69a432362706c357" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 0 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 14909440 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 14922752 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 23229440 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 25998336 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 26011648 } ], "md5sum": "3503e7c881912913ce615caced2fec30" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "a11c39afd557204b0a228133fd8c12cd" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "2ea3032ebc9e756671c463f6698a473c" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "be1c7b4715a627fdf4ee20d2ffaf340b" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "af206cdf03ba015808f7b94aa5e7162c" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 0 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 14909440 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 14922752 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 23229440 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 25998336 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 26011648 } ], "md5sum": "b16718a71b553f84fc005c72fe53f033" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "9e062b3ad537deb68bb16c1decccbbf5" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "593d4e2a951562b188b97b1fbbde616d" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "52aaac46e6948d3d94e65989bf5482b6" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "5f736daf7d6ebfe5f5a1c3baef675336" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 0 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 14909440 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 14922752 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 23229440 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 25998336 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 26011648 } ], "md5sum": "767cf2055ce8f667f04e51c0bda8af54" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "f277a716c85515f1e49bf086de57ca5a" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "0a4770c6a96938492fccd45c0e22cdba" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "db99e083fdeac1fdca8ef8405f5fdbc3" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "bf12c2abc03fd6afce4e66b35b6116f6" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 14909440 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 14922752 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 23229440 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 25998336 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 26011648 } ], "md5sum": "68198cbdbc484ed2f18562a6e17bc0f7" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "c00577808df188744473e7a3409c154f" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "baf6191975e1f63cb138e8ae82dbad2d" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "9f81e4a6e06cb91a3c79667fe4d5e816" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "e71206ac6f3fb3b8d64b900a02491ad2" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 0 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 14909440 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 14922752 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 23229440 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 25998336 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 26011648 } ], "md5sum": "30cff0cf5f38e39f70616cfc333ff5ce" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "eec0d29a3613dd961ff853186a6b70b9" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "b1ab1409d61f28eeb5466a55d1509e85" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "d2c24bd2861f380e6531b57f8fd3d728" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "938751f6c0b0325f9376145c8b3b0fff" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 0 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 14909440 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 14922752 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 23229440 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 25998336 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 26011648 } ], "md5sum": "2773aca993768ac0538f05a87b29a661" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "7b4b0ccb8601551d13c1c192ac6ef07e" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "ec3d21c5b64d325b43e11625440ba20c" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "b398d9734f8f71f67c58daf57decb027" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "32595b39a6a7e84ff13508566737ed9d" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 14909440 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 14922752 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 23229440 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 25998336 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 26011648 } ], "md5sum": "0a46c4e5ef3bd2cbc06bb91167097f9b" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "158bb199157c14caed50149bef6839c6" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "b683bb0a0849bbbd4b68ac8ca7c41cec" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "ce90e6c569eb9652712dac9c27fce851" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 25998336, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 0 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 14909440 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 14922752 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 23229440 } ], "md5sum": "a6ca1800022837715111111eae012495" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "9803937ae6efd676a905ff8560ca27e0" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "4b5f044f25a3e1d4a382ecafd993ff1d" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "332a5be7a4e72e5fbd558b59bc7f658e" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "5b997b775eab0034dcc4dd509aed7ffc" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 33479680, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 14909440 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 23216128 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 25985024 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 25998336 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33453056 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33466368 } ], "md5sum": "3161422fc85b1d43645c6308079f244b" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "c7b7586a568779eb065a84c419f65638" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "d41d93c86034a22fc8c74f3c00e87742" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "8d5783d523d94dd98034e93bd627889c" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "a5439c6188ef6ccf053e26e294358dc3" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33453056 } ], "md5sum": "7a9717a9f99e0a573af064b2ddd85ae3" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "304b2e05c67c7eb18f7d71deac091373" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "49de694fccd762fc5ff86cf31cf32156" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "b6bd102727b9bd5e06f861258728ecfb" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "bde2a2bb8bf7f597a049134d6a0788a0" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33453056 } ], "md5sum": "16cbd433094fef7c0376d4f47c048970" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "72f73d2e0e9fa8277e72bf2bb8ebb24c" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "9381a59995a208e85cea8aa519f9a9e5" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "957b5b68fa6e949d141915b6e6dd44fc" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "905d3d35326bd7b86a95702535d0b40e" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33453056 } ], "md5sum": "022014115f7b5bd2caac75fdca0f97f8" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "23417a4cf9da2c010d4edc5b1078bc63" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "d238ef5d5fa38bca0e460d6d169897d8" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "83e200de88baaeecf1653b8440f45ebc" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "0149e222092152c84d3e3523bcdd7b2e" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 33453056, "records": [ { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 } ], "md5sum": "04274a88ccb878c7d51fcba8689722f8" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "c210c0a815335496072fac2ee42e93e3" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "29ea093f9dac5228139bd61456344503" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "ee55a100b6fa5ef5c571b6eb04833865" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "a35122c308176b959d179655b2903c93" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 33479680, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 0 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 8306688 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 8320000 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 15774720 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 30684160 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30697472 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33466368 } ], "md5sum": "1ef525b0996d4ed90cde4ab80163803f" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "e05b4a012f89cac8656a9f22227e03f6" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "56f805aaf93196be7429ff897c2a444c" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "b1cb6deb3f3007c931aa432ac1250750" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "09407ea08f55228b6c254c9848435c45" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33453056 } ], "md5sum": "ab531f9afe70c94aa09c6521449a4529" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "2f48dfc4c7982e6910e6c688b7a0ed1f" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "35e9d0dbc4f18d4f2578ff6a62bb7061" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "be52db96d6c92b06a344143c229d4961" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "8fcf6da47c51c7a8b0fba311fae496c5" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33453056 } ], "md5sum": "b29e273a5b761f3e2529bf385b470dbb" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "73978a016ffd538911916f5874d8a178" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "41c6a9db31d458bce0eb4a5e026421c5" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "001803da1cc73e6abd87ef094d38370b" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "e22038b7aa45a7842ab5996cc07697ec" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 33453056, "records": [ { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 } ], "md5sum": "5d3f66d64778a0ddfb52909e9afa17ee" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "56ae7b30025cd7a63987b7419f12db03" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "16d1a2c28253f7ce0d0de44a737a2f54" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "088661b7e37f306f843c729e7eadf0b4" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "31104633a3b79e6ef47728b31ca98678" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 33479680, "records": [ { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22364160 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30670848 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33439744 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33453056 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33466368 } ], "md5sum": "16aaad862dc08bc139e5c818843fbe54" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "877eaf9c325b235223c06b7d51be1628" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "f5481c922951ca17b5975a95f3f7d37e" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "358f29e582e16756b6886fcf4340c511" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "c04ea47e658a6ce3fa3e518aa597e064" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33453056 } ], "md5sum": "b86129f69f110c6fea0bffe2d88085ba" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "0f051d8a78ced43f65e2f581cc4265f4" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "583cdb4030c7b4b50453481faf708087" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "09401526d192c9d6bd1f1839a2476d1e" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "dafa600e92a1637ff42e4bc7e749b320" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33453056 } ], "md5sum": "47e1dc30a5b16a737738c11c35f4608b" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "948cb5ce6ce7578acb496e9d6f462432" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "c89b42bc4b2eba12300d5a96fe6f89da" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "a459b54fddc886561c318b32e2c8d05a" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "e1db916d5cabfaeadd474b62c40a5250" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33453056 } ], "md5sum": "7b7ace8282e782d86940be0695a3bb9b" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "20d7d29b1501310c01f9112c4e63c434" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "539116989124a87df7a9af16f1297f9a" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "3e5a8d9b459a042404ace12c9489bf7e" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "37b985b20c6896422f42c3c90664313b" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 33453056, "records": [ { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 } ], "md5sum": "91cc382066e2f48b49c0dbba3355027c" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "577fde23b5e620d8faa9f2bd93bb3f2f" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 33240064, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 8306688 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30457856 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33226752 } ], "md5sum": "8001bfdf2cd800929c7d379a9d36e310" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "2f281d2bcff58fe7832d6eb68c989477" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "6744999a6c05c0d96689369f4d1faf81" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "9ac8d337364076831539f6d57724ba1f" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 29845504, "records": [ { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22377472 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 22390784 } ], "md5sum": "c2775fa6aae4809fae145bc56216d30e" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "3496b3f8f7475e7fd336be17079ccb8b" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "38fa3133b3517130157c0112823d6c66" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "cec0eea1b5cbb7412f7d26048db17d2b" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "142bbcf882e6549d9323e91c1427debf" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 0 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 14909440 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 14922752 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 23229440 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 25998336 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 26011648 } ], "md5sum": "91fffa02bfe60c189dbc93d8deb4df95" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "214dfbe8d3f016060b5838d0610eeb2c" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "e84e7adaaef3fd2cb202b51e73a9044b" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "23e4db63345f83248b1273a0d97ecb1c" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "1bb343b0fe4a4f1c792b7adf0bf2afe5" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 0 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 14909440 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 14922752 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 23229440 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 25998336 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 26011648 } ], "md5sum": "fdb02acd9ead2a81960d7a0aafe18208" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "10a82db88fa7d836890ebc3b77a2467d" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "b11b3e3ba474f8e15ff3d9cc1c4cc5cb" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "48047828536f23597d8e9aeeca1bd076" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "1d1439b685ff2b5ab33e5f1a121ab532" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 0 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 14909440 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 14922752 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 23229440 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 25998336 }, { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 26011648 } ], "md5sum": "1b69a83d115b72e5e9c1f4fa0cd4472f" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "c4eac57c5dad42ccf3ba362012287dd3" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "548590401f60ead15380e30c12f55554" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "8b831590140e2cb78abbfde6b0e1db58" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "485faf0ae5aab5aae214a15f8d395b2a" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 0 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 14909440 }, { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 14922752 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 23229440 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 25998336 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 26011648 } ], "md5sum": "836da33718e70baa52c9f9c74df91281" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "eacfe4f84f9b8eb93f5ea21c822172c4" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "f23c10e61a642f4981c8bfe2f1fc0536" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "55c1d28a00f80dd85fd5a1345e91a0dd" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 25159680, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 14909440 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 14922752 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 17691648 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 17704960 } ], "md5sum": "1b2f772bca04a5af2bc9dd07f4a3b26b" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "fd955e84891ee27873087f92ff4fafc0" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "a35b9d55c2aeeba7c5300a88714e7236" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "1269b8a9ac9888e88bec9ddd0f82223f" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "4ae6174e0247b0dc18ddbd6405f0d8ec" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 0 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 14909440 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 14922752 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 23229440 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 25998336 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 26011648 } ], "md5sum": "0e307114515a9ef93a9bb961a37bec27" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "5385a38cbb4879ce73fb9f92ffe8cf89" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "27c4df7120339b94842a41423914ce13" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "79e506ca802abeb976eaee8ac4861704" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "cfa341d36d2d3a4cfeb86445a1d287ca" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 0 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 14909440 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 14922752 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 23229440 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 25998336 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 26011648 } ], "md5sum": "b759714684e1a4647a9751c7084adbdf" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "3889d7314a29f2f6f5befa0fc96678d5" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "41ae10dec746c79fafef99f31a92dbe6" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "e3f26f6fb0437cd8c3b8238f97af9d62" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "a81c46d180d21a764273ff0446575f7e" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 33453056, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 0 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 14909440 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 14922752 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 23229440 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 25998336 } ], "md5sum": "3f32df7648e599ad1f4ba06c28bed01e" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "b8edb3d2dd54827c50f2f8da4219ea9d" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 33240064, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 8306688 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30457856 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33226752 } ], "md5sum": "2b1ad874cf50887ed59d2d84309861d7" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "671bdf4a2fe2f89ac38c1519a0e075e5" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "32c9093eb964f0b38a8e4a0f110ac9bd" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "766490baa26371522a31d1dd4ce64458" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "ea1427e768b78a28a2de23d9c22b1b32" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33453056 } ], "md5sum": "c6a4f396a5405ed2d244b659d2cb2101" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "e881fb8e38d00f54442d2cd973bdfca7" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "84273b21dba02480137caef5b1ba35ce" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "356d5931e792942d854910c1d577885f" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "c0bcd07c15730c5778682cf29a5fdb16" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.42.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33453056 } ], "md5sum": "8446b9e5c970df40b91c7523c0dfd06b" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "fc55106d9d0e470f87f7eea2e54350ce" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "ee036948fa014dc20a3cd8f8bc939f75" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "d0b3ed7f8ba4b25c4b83f825cf0a19fb" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "751394bf10b119223f5c00e806101b58" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.43.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33453056 } ], "md5sum": "3d19e97279ece11ca6395e9b11dd8787" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "1883772132772106af3fb1e1cfc50b2c" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "165df237c540c06e7b98022e28b04cd7" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "bb6c5a0c0424a548ba160a3faac9b0fb" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "024f75c73bf843c0aab36400005cb5b4" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 33453056, "records": [ { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.44.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 } ], "md5sum": "ab12ef4ba9bdf6ceb828b83cd4041e70" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "b56b7e2be4219e43cf1e109c972a94f7" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "fbbb7c163a565f2111a7eb67e3d867ab" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "bc9d0ea6485de4bf33779dff37c962b0" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "37e1cfd6507c39fbe50aa498f76def17" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 33479680, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 0 }, { "name": "model.layers.45.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 14909440 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 23216128 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 25985024 }, { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 25998336 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33453056 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33466368 } ], "md5sum": "933950b3325903c02ca42fae6e56897d" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "8589790527c010b8436cb612bb4d841f" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "9c6047916f154dd019354c7c10b99440" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "2a26306dd869c32a4b9712d0cc177557" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "a8ef5c9291c46692abf2aed63cfd076d" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.46.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33453056 } ], "md5sum": "cb65efa05ef33b012c84c34b8976ae8c" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "b63b2a781ce16cc40e98082c578a5e0f" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.47.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "b1de7ed7d5d65eff9d7dc2e06f514f30" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "36539960293835ca856248babf22bc9f" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "cfa2d3f0396e988a575527a296f74f19" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.47.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33453056 } ], "md5sum": "f677450c8fd0355f66068bedeede5719" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "582700759b98bf4990babc4afb7d2110" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.48.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "5a2d73a601079eb06ba52b754dcf4bc9" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "1374007bcdb5a6c9665fe407edc18f48" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "3aaad772a053d6a3aed963d260ea6f0c" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.48.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33453056 } ], "md5sum": "c9aa025fe7c8075e37826cab05974131" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "6eeee585a2429e1575416667fda77567" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.49.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "77b0363936de6d6353c8314070e172c0" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "158430354b13155f6f9a5af3c5ed1a3e" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.50.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "706d1142e07200b084c78ca238450ea7" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 33453056, "records": [ { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.49.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 } ], "md5sum": "37f8845cdebcc3414d70a204ac231f01" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "acc404fe5f412bafb453ac82dde2dd34" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "04b728a8fe693170cb951c8a66b195b4" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "0462388f77ff4350c0b1dc0a22f8cf36" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "f36b4af472ef9028288467607c6d1e21" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 33479680, "records": [ { "name": "model.layers.50.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 0 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 8306688 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 8320000 }, { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 15774720 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 30684160 }, { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30697472 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33466368 } ], "md5sum": "dc29aedbf945fe050376a8528ee9162e" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "968c0cccd6a53257c4cb34925fc530ee" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.51.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "d9fbac3cc5520afb35264dc4603902e2" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "e994143cbabdc2cff9b9335f1a7ae050" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "c66464d5fe7ce783d8ac591a2ed27491" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.51.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33453056 } ], "md5sum": "ed8ad5299a5fc6a7cfcc816f59ee06ad" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "bd409723c4001b53b0e806233beb6b77" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.52.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "9f60917cb474856df2b899e25402088d" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "b9fa92e493ee8c3e53a89f37fb38c584" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "bcd257226ae536162526a5418c4c91cb" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.52.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33453056 } ], "md5sum": "f079e171a5ef08af8d023007b3fe9936" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "8a2253e8089bc0f7022d2b1a92e7add4" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.53.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "4bb4303038cba4d8baf92c93d1ee3c7b" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "4a9bdd8c76c28cd4f3adefac8708cd63" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "ac3e26117dfadf478f3f955e0f615637" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 33453056, "records": [ { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.53.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 } ], "md5sum": "9b191604ac8494d2e2334ae29c721964" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "11ab2c742c7444748c4e8039cba9eec6" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.54.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "bef4fae0ca08595a3c8ea43209fe66b1" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "87fac668f3e311fa255621da5ba4debb" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "ec5e730a87f83c2494da9d0c91127fd9" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 33479680, "records": [ { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.54.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22364160 }, { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30670848 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33439744 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33453056 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33466368 } ], "md5sum": "aaf5b16e514a2265ef482efe1afe5c62" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "7e1975f14b7c0159534ee5c41943526e" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.55.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "f1dffb6708321a61f058cf48eec142da" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "f9cfe6f4d42dd686464c8d25aa47691e" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "290c0a64d248d1131cc4e78d869a62bf" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.55.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33453056 } ], "md5sum": "e837217b612dd974b4cbc02f695a7186" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "2d0d663b9e6717be46fe95c7846fefbc" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.56.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "f785179cf112bafe3ac3753f28f775d8" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "4cfcad5adbe53c937c9d71f491797a2a" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "9a3be049ca74b213f03226948c80abc3" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.56.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33453056 } ], "md5sum": "929fde75cdd8e010dc9202d898249747" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "c0cf1ce10bf41cd4bb2b04b401ae0642" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.57.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "d09f592a5366dc71160a828c2980d8f7" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "c03844b7283d8ca81a9bf470ab8bb400" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 59637760, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 6656, 2240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 59637760, "byteOffset": 0 } ], "md5sum": "b5e7e488a69e8bc0fb5bb6f84b4033ef" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 33466368, "records": [ { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.57.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 33453056 } ], "md5sum": "3de912d28b0868250eca42edaf427686" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 119275520, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 35840, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 119275520, "byteOffset": 0 } ], "md5sum": "5dd5dd77f3ea8b7b3c615a4f4a89507a" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.58.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "1e1c07709a920f74cbb6dbc2d8f5c5c3" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 22151168, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 0 } ], "md5sum": "f3bbcd8421beabf97ee529a4068b912a" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 66453504, "records": [ { "name": "model.layers.59.self_attn.qkv_proj.q_weight", "shape": [ 19968, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66453504, "byteOffset": 0 } ], "md5sum": "f37e5d03039687ed7758543d00b0fa5c" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 33453056, "records": [ { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 6656, 560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7454720, "byteOffset": 0 }, { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 35840, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14909440, "byteOffset": 7454720 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 6656 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13312, "byteOffset": 22364160 }, { "name": "model.layers.58.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 22377472 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30684160 } ], "md5sum": "e7d3fbd38c04ebc4f8bc32cc2900e559" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 33226752, "records": [ { "name": "model.layers.59.self_attn.qkv_proj.q_scale", "shape": [ 19968, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8306688, "byteOffset": 0 }, { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 6656, 832 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22151168, "byteOffset": 8306688 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 6656, 208 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2768896, "byteOffset": 30457856 } ], "md5sum": "56da6f30805451203fc17c2948eb0820" } ] }