{ "metadata": { "ParamSize": 485, "ParamBytes": 21092663296.0, "BitsPerParam": 5.000635812792825 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 131072000, "records": [ { "name": "lm_head.q_weight", "shape": [ 32000, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072000, "byteOffset": 0 } ], "md5sum": "51c289afd9bb1224b7bcd4e0a2e62729" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "fac0b594cf401f15e4136b8202972c6f" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "01c0e66dd0e0da43434dcf6812f9b0e7" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 27705344, "records": [ { "name": "lm_head.q_scale", "shape": [ 32000, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384000, "byteOffset": 0 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16384000 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 16400384 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27672576 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27688960 } ], "md5sum": "5ddffcf244c3147b65daa87b503af5f5" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "d4c2e3b257caa36627d1ae752cd90db4" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "222c18f10d289eb8564687c03b1eead6" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b2509047cc2145aef50c4e9effbb9284" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b559daf0394320091d2f849c28d57f4f" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "8cc21e276d0dc8a910f2163da0718ff0" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "9150aac827b0799502534a00aa200ef0" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "90937e2fb8bb1612579af5e6e9afff6d" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ce1d631c44cb5e5b90a08366597896ca" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.42.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "9c014a17bc2c69a74954cb2b19777f21" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7c593465fee733873a8972554a0f4a23" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "1a88ccc4baadf1a4fa6319e617914ef9" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "670509b9c97496ed016b0d0a5d252449" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "39fabe2c60cee5a4d0aca64cc82685ab" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "93f54305b1211bf7cebaf89f4fe1eeeb" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "170148ad68bca3d11926ef5ed759518c" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "4058d608c801ccef2b2b21aac799bcf3" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.44.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "acb9023c360beaccb07fa7c900a582ed" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "0ca7b4fd9d7ca72e024537c8a635842d" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "27989a1dee7cc179b29cfae71acef416" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c55e78f0b50d16827534c4a0d02fe5a2" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e9fb0e5e3a81a5001a673bb00573ae41" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "79978c3005c205040cad0a035794740c" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "27d6b988aa0ae5a10e120f5aecaea60c" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "fb9bf20c6563a640d2bd43e1e71b61bd" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6abe2d2c675178e57adc58c2cee85beb" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.45.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "d7869b0af78202ef138f8a8ca4731824" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "86cb33e044276bd29a6cb8a14eda2cd8" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "8853959bbc6c5e40840e638be5a0a229" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "742fada0d490595dbd55f3474b38017e" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "39adad76a4b70299c7de8b5d587a58fe" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.47.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e5308eca49ef12cc04959e907c2c90f9" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "765a1a37129049f99a74e24fab726072" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 131072000, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32000, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072000, "byteOffset": 0 } ], "md5sum": "6ffec9aa4b64ea12346ce7ff8cefc502" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.47.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "bbce586412801428b53a715f0347d9b4" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "20447ad46bdb295611e41a21e4b6dab5" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "d9d5d80f9030bc72421a1fdea0094cb9" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "4607cd8068931a1cf786371702a77b46" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8b8b9a4acf5b98a9640bb0b1a9370cc8" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8db07262d8523745176138d0a9daca91" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 32931840, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 32000, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384000, "byteOffset": 0 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16384000 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 16400384 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27672576 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 27688960 } ], "md5sum": "da0f7f86165ea3fe362939c74b284b23" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "a942d1f9f092672fffb82fc4d2d7154f" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "6171366bb13395fa365cae3aa23d0af1" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "45bf48e549d2307f0d9910485127b2c6" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8a60d426d986962eec02915ee9b29638" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9520f82d2ea0c0874e05e0613f679277" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "cbb34cdd3444dd24a4164d122e0cdcbe" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 24952832, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 4210688 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15482880 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15499264 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20742144 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24936448 } ], "md5sum": "480be28e2b96b974833b376ca9c2b3ed" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "e80c44d63023fabeb68fa6b5cccbc65f" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "c3509e1f874adb8cb29c21c22f2b7d20" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "90661ff0aa11c8fc0fbd4e5b32221cc3" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b2fd0dfd56de5d877c23a85841363c87" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "6cff45883241c0aaba1b907d2d4dd7de" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "f06d1671307b4792b594b9564405c7ff" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "9fd76ca6a4619b30119dbc671111e44b" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ad279169b0407bee409a98a99ce606bd" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "17ea57e605e6b34e550f8bec34a9e2ad" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9a64dad17d7c68fba727bcd38831f1d9" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "78f4df090b1f6a8b95d12adb65b0f289" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "e13b970ea2eba03147e3d4421aedf0de" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "ac1f29649aa3bf044383051eb8b518e8" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "301df633322aa27069273fb9e32b658d" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "403d51c9b60939f9c32ed2132de3c121" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "46c0010a19d7600954946fdd166b3790" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "e87d341d25e2d16685c28c096367c853" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "99073cae51496f5f5667caf29be60fb8" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "d13d342543642fbeacb4f6113d6371ac" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b8a8563f709ea96f5ef70c61f7907754" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a9c7846ea31ef612ebb0b539753a1b5f" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "586030e623ba8da8b0acc9dfc904a98c" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "fe2d1ebbba5538a94913495b8b9ea770" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a4520c0d263d1113bbfd66f6c26aab85" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f944ca5932bcf0948c554623c0762c8f" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "05c08640a2887aad56a9875672ceb716" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20725760 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25968640 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30162944 } ], "md5sum": "b5d02ff4796c05598385578cb1761d14" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "3d2a1e50775b74297730f27209c6e9f4" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "f5d4177748c4624bf88220b1dcb32895" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0eb2186235cbcade2beae2f840680e1c" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d5b5a74da0c51e50d19444c72b9e3221" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "64a51dc0dc45ebc29f477b97eaff6bfa" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "2b66649025f482d70ec48a6361620cd9" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "3339d50e963e2f3d10a34c9e1da53377" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c8942d99a3474348fae79d6d24889802" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "d4d528531839b78fecdb053808aef423" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e01d7b219f5bbdbd2d9c4346710a8b8a" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "2b7fc8c46585c4af1f2074db5206cddb" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "d9f1a749fd4d5f64b17198dbe103ceb4" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "67c3be9dc9b54c16d72a3a9fe598dec7" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8af97d582d717ac1da97554f18bd32e3" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "be88631f5a513c73bb360d3aca3e5510" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "8ff9be5b7dc7205336b959428d32cd3d" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "69b095128caed3b629c5c28f5e278b74" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e6c9bf0f5f1aa8acc04270282537717b" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 } ], "md5sum": "7be6a0efaf6f3df8f338bae33d148862" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9c010dc016a78d3b226cb19575074636" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "a64b1da3868428d40aff11012b925ec5" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "f4420f464ca22f6e8d550a865057e88e" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "a68c2f5c0e1d547f0683e9082f4c2f55" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "58e5e89449c36f45e32b5cdddefe5462" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2d7af53ab2686df0d6692b72f59b2da9" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 32047104, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20758528 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32030720 } ], "md5sum": "76e46dc43bc0b7e78b5c25723701fff3" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cfa66a77c8352ca107ba3e17e0f59192" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "7c974b606f9061878fd776a508e4c9f8" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "105a290d7866088bc54ce3a6fd7f5b6c" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "61488efc9fc8846a5b3103eec0f1b14a" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2dd7800534b7e11a32007a362036cef1" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "408c06f3d3e6cc1feca770b5a1d2b71e" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "e9782cda2d14ce2c493aade04c8d58e1" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "b124566679a20ad9961f1d44b52f7e80" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "831e42789d775a5fab48975380bfb810" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "f2174d80ef52605d40c66520deb7405a" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2f3b31d25d5baabd24bf09743dc41354" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "25022d4cb7f29cd400d9acc568dc1f90" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "3a5cb5e0b5b1e5afa15ab04182ddbe91" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "f6c4f2d2a8837c1e36f381d950098607" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 32047104, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32030720 } ], "md5sum": "60a7d777aa6b4aeedba3c095b2992487" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "12c4726f5a18b4a254dac7a7b8835590" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "0eff33136c8f371cce12c514d936eebf" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "82d1533796d6f5611c8bbe2a70de1fca" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "10dc8b9a048bf4835de857029f62447c" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "fb06fb11dc641cf6a7feaf74fb437c17" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "8cbb8a55a9ad6ce27df35e426a09ace9" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "953edc08d3d31d75b1c3dc0b4d58d171" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6a9be1ebcbbde3544c0492ee558755f7" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "2d180bde7b621c476d2f43b0c47160f8" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "45d9ca1847beeaa611f5bdbb0a8b2421" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "3e88259421b2fac642af46875cac75e0" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "eb32a3648b74ce4d8a2131b4c19b75d8" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "f8147a1d60290675dccb926407c10bf1" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "fdc9bc3c32666eb7bbfff0b3a6afce1a" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f30a48636f5b254c7b765c214cbaac5a" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "ec316084e13e44f57a7cdf42fcfadc90" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "e38b72f47dfc414d3858aad5173bd960" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "086ea592c4abe062288368197258cc94" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "a8f911873c895e162b62b493973452a0" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ae7695ee1ab8ab305bcab0264d6933fa" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cae8965ed1c978a693539674eafce3d4" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "dc3f4044d69ff36728269a73a024f423" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "ffe1fb4ad95b9c9708dc72ae958e75ea" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "08890436effc1f7bf64541c86fe871af" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "19fa149495e7fab7887ec39f39a80e6a" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "8936b243cfd6ef39f893542b7874f8e5" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "191f40b5f46285ff5cc50b2b17ebdb14" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "6f1de1d80995ed698295062c227f96b3" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "14e7700710a33faba8200d7d8e0bd04e" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "4a1de74f1fb499bd17aae0f09f8c4889" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d76f620738ed66a47a1413c8df82226a" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5032145dd39299426bbfc0c9bf8cb330" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "6d38f94bfaf167a27ae89fdf508b3472" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "3ace85d9374c5186b90cd10d3a072853" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "be879c68ab66c100a92569e2c945a5fb" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 } ], "md5sum": "dbfbe5959b7f3561f2ed0e3d1f1a9951" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "20aed52c1996c9f204938181b3dceee3" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "58ce3a34170fb93ab86ea3d2aa729dbe" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "0262b29467caabd991c1bf0983ec6491" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "24c180907a301ebc962e72576287e694" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "4d8164bd2ba44a198abb350ccb4204be" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "bb01c7587518862995a32401a12a08a6" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 32047104, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20758528 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32030720 } ], "md5sum": "ac1f386e03f79b282f4fb5c168d03033" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4b84d1f342363b3649e8c064e09f243e" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "6ec0bdf4fc155f845b4d699b14c0af6d" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "988126b28ca872fe3aa3760c7dbad17b" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "2f93fdbaf87dd05adf1b98ae9df949e1" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1d39ac33c1ee82ab03f7486a184dbb6c" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c33be460d3c492b4d58e5b45622062d6" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "09a9894c025905e44535dd5eb96b67c0" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "7d79dd0409ad7f6d7bcf016546d8241f" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "4743faa2e1528b03d43b927c787658f1" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "ae81cd9a0217584b18125cf702440868" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2ad313d41a074078137068049c09c02c" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "726a681b652425537e23c04f74d7d803" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "f5cf4a750dfe9210a7b35d79194d877f" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "316d32afcce008e1423770c761e4dc3f" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "915b0cc669b30652108e987b5578a360" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "af3232587448232c98b80017bdbe31e9" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "9ff5b348bf6214d0bbf598add0eba907" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fdddef8e6736cc85d197976cfc18e57a" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "9de93485ae651a3efe0b8d5537c5c6f0" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "1c8101007d1ca2303506167fa3fbfbb4" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "f960367f9c7456fc87149e6dcc52e8f7" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "735269649640623a359515ded69a38fa" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1ba000ee946c8dc0a53dac1a59e1c2a8" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "56571ee9fce4672df5883fb2093b4975" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "4aa701af11ffb5689b78e5f9aa64b568" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "008a2de95ba2cd104e582775070d192e" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "45974b446b9d1fde9076c6bd7866f772" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a5e28ef3fbe3a3295adb229be2bd4c84" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e4d0257fffc384d664a5090fab885b1a" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "418a99b13d1259bdc2033f6f66fdb77a" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "acf0afc06ef8cb3989dee34a2f1d69a5" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "548ea58d131f6b6c468a537f625053c4" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1b2652efd4f1f8cc7ec3cf87dc9ea221" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "d343c13ed18f417c9c547ec2f4de8565" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20725760 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25968640 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30162944 } ], "md5sum": "791c59e363c4461b0292926989f2a87f" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "46991ba4e5971e5659223e9f15482841" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "af1ab61408381d31679ae9b4d1a5673f" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "b77832a4c63ba3feef59afccda0e065f" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b32b36a3963f8d435affbe3786d61467" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "99a22fd8fe240ca9472265d31815e47c" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "506f3c1dd398acab938ddfe5d83c07d6" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 32047104, "records": [ { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11288576 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 11304960 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22577152 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 22593536 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 27836416 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32030720 } ], "md5sum": "ff2a4986dae7c9180ee33ada2876fddb" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "b239b77b6e66d3ce9ffc95a682e4db46" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "91da5dd56cf7fef88e8154761f256d74" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a1e45abb3551477b4e5b892a3e06b8fc" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1bbde9a6c3daed56707d59d26ff75287" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "0308901564a349f966c78978645bbe4d" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "d1e90e3b3578872cf689284a35ef6a6a" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "8c028f2de4e7da9ae35527b6ff1edabd" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b487ddc055b977d27b9783f92ac4e9f3" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "a55545b805a73cbd7666f0f5679e7ad0" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "058f114b8620d5708517dc72762ddf5d" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "00a5ae011f6015555e9e715d3542743c" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "a0f0347aa5c20a3406f709a48b6e7ebd" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "5cef43a1b10c6188645d63ab3c328f34" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "434dc4407d7fd85865583c7b3c027084" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f0d5712929f954041ffd4b3d1db613f8" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "936b91e9da8f23031bce918efc6725df" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "bfe4e8b29580d52a810a46f255a145e1" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "59275eed62d2c2d448370b6b91561a21" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "a8e43c3bf9c2a9f618597cc1d9978d62" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "94d9ab055e331c223b0764ce42e7de02" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5be577502cd869f3b1b9a75d011f80cd" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "16b9cafce4eb75963bfbf7c8871b1990" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "21dee41c213d0ad5d90f31ac1b350f00" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "2a3b0d7cfadafc7334fbc9a90a0447e5" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "62288327511dbc253eb866df9b84553c" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "7e2de06796d7290d80e93fa17caf523d" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5dbf676004b1e3209b7cd05e63182ec4" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "fcebe85bf2921111a393b9b9ed403b38" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "165350fafd5978a6845d548a5891e713" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 31981568, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9437184 } ], "md5sum": "48f95b4c6bd4da70e265bb4a1c7a653d" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f6ff9e7ce4d631b4ac72d523162e1f8c" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "f1eb1309ae0ec7def775873f5fbed23d" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "e566567ba6a607f5e7b91adcbd6d599f" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "c6b047fb09196ae846648071ccd00131" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "1c71657126abf1bab0137b12b6fbd27c" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "aadaf8594cb1ab9048488abdb1529074" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 32047104, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20758528 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32030720 } ], "md5sum": "741a481d703effec6badfea46a8fd13f" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7912ad7409bf8231d1d472b4e60c974a" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "58dc551ed9b14d1e7706b644a5bf62fb" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "bc174d96b59a75c517606cab00c239e2" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "d3a967e37015f6cb289a74051965a901" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "695ce59a551959c98d060e43ce36bb11" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3778821f059d548908bb7f2ae81807cc" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "63a719fd947f375236aebc9acd75b30a" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "1a7ef4c58d79fb6d0fa6ec460a69dce2" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "975adb112c7413e03baf09847112fc2e" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "06ed340f207725e96fa5b6098c8037fd" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "dc6c82146d937a3739692d0ffe04af38" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "631249db9749b2e41a74a12745f04602" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "8d9de59684af45c6a28a901f5d215f84" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "6a104822852202d9fbd65d48d6685984" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "089175bb1d798a78f77da08b2295e0cb" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1944673beeb061b2ce10d600fa357940" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 32030720, "records": [ { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 20742144 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32014336 } ], "md5sum": "600da115d162546c4b7fb944a7571075" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "bc21cec0e4f6efe4cbc5f94ed7e1a81a" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "4f3aaf5bb7eaf4dd6e948a56634a698e" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "568d0e69483dc1210a1644bbc3a8f91d" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "1b23a892827b6ee9f72477a253add733" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "bc7716b3dd366258105902624f1b1d68" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1cfa966eeb69c9ac0175e1861e443890" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 8192, 2752 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "224dd9ca4c5ae414a232023b58b9c1c7" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 30195712, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 9453568 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20725760 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20742144 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25985024 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30179328 } ], "md5sum": "fea8329d781bb013be13164eaea76f25" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "8f6a462902ea4af0c0d101fef55e0cb2" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "f383135de5ab88aa5b26f71516e1f171" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1c298100dfc9e0501f9cfbb0d59c8b3f" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9128d8cd8b6b3768dea3278469fcb0d5" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 44032, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "99f9d58e4a0c7265e1da5665cdc391bf" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 44032, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "62c1719f9076ce87c281835a8cf0b76a" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1e415c963f9ba748c87a3713e98f5b4d" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "337e10a8530dc8e4314fde7fdc8215d7" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 30162944, "records": [ { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 8192, 688 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 0 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11272192 }, { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11288576 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 16531456 }, { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 20725760 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 25968640 } ], "md5sum": "6da908215a65ab1a6bd8ac50b0b7588a" } ] }