{ "metadata": { "ParamSize": 805, "ParamBytes": 31776318464.0, "BitsPerParam": 3.4877930573818188 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 420679680, "records": [ { "name": "lm_head.q_weight", "shape": [ 128256, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 420679680, "byteOffset": 0 } ], "md5sum": "167d7a9b7af56de22920e15c021733ba" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 52584960, "records": [ { "name": "lm_head.q_scale", "shape": [ 128256, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52584960, "byteOffset": 0 } ], "md5sum": "e9321f4b3a03806c179878cf4fbe4670" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 420679680, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 128256, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 420679680, "byteOffset": 0 } ], "md5sum": "367cce4449b115480651d96ea625817a" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 52584960, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 128256, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52584960, "byteOffset": 0 } ], "md5sum": "b5f743dc05d728ff752d216de7147ee9" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "707c508ebb5434986c6c8a0a44e76e91" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "7b368b65d23096d193884ec7f5ca1ab2" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "280cea43c77fe8b0deb0fd4f67921297" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "0677e17311f61af3c62f2569e652dc17" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "8ecb7a37751d66f80e6ba1e70e6a41d5" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "7c4ec25848dfd3807ef6865b65259b45" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "568acd6985613f8de692ee1e773594e8" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "e4bffe8109760b617258054b97490aba" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "31903b7987bb48d71e59c60d1cf90bc6" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "522e2afd8bf8ef5a3d2811aefdcf23da" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 26910720, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 0 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 16384 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11780096 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15978496 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19337216 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23535616 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26894336 } ], "md5sum": "77e61a21ca520d6ee0f66d9c368b9f8d" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "e20695557f6e532523dc4b0d7c9f1c00" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "6b898bd5e1276ca4b13c03a34f60c1a4" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "940576863d0726dd7008343c2dc6616c" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "05a7e439b9c0b0435f030500137032a8" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "91a767efcb34b6ad3cacbf309d152dc8" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "2d9141d6284907247082c06581eb16ef" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "b87f11ce1a186450dc336402c7fb3eab" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "ca63e01a0654271cd90876fbec515e9c" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "02d1d4b4751a06ad189fa65a5ffea28e" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "d371b6ea796fd805554f637be178190f" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "4e2109c82094851dbf882f57a49f4024" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "4e11293a996124f8e80dcc0417ad2e66" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "18258e03ebcc901f393c65fcffdbb9f8" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "e5deb6dbfc92b796675896a08979f7bd" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "affcb808ef2058f0071291040f0cd3c3" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "efe12f17d9ac90e2534aa5c0a6ccbfae" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "ed457f46507dbfdbee7106a61abc5137" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "96688c04acb67786c57147e1e6834d51" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "a0ffbb0799b735d18bf2472dc26fe9b9" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "43de48ceeb4babe5723571cc4a5c4c66" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "995d2408c0209d9a52a61b225eea574c" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "66879fe16480bc4c4c64b77592b9c688" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "35d2389b936717ba53516e37effad55b" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19337216 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31084544 } ], "md5sum": "e4dfb26316a3bb5a59335464c0a04111" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "a5d71153c967ee06579574cf5dd44842" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "5c0da72336cd70220a0a75e98b1cb9ea" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "4ef636f58eb922d3a2b8bacde5413efe" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "fe7da0b50ed3d6480007f43b62a5ff0c" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 3358720 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26869760 } ], "md5sum": "f73ac212d075f130ec18ee8f7f6f5ebd" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "81a356e86a6a0500d8983f1b6d65f2b3" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "4dc8140b751eab0141c9b9a1eb0c2dcd" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "b0263a969bea1bc223fa0a8cd0c5752b" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "73895dcc7f83f55dfbf944c67f397fc8" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "3102f39afa4d38ad746a57094d746d2d" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "5499ae48149287bfd2e307f6f9d13f1e" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 15155200 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26902528 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26918912 } ], "md5sum": "a9f99e8b5140fcbdea5e3d1d396c71ff" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "a41cb72b3ae03a468ed5e26d20149dfd" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "b66737e415becb62de23d09f05ca5d22" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "0235eadd5eb98d31cc08d4bd694588db" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "0a959c43094a621da9a130b6a32e563a" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "a843b3f16a6ebba9127514d9745147c6" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "5c7ecc05f17bab1d8966f7f3c9ef229a" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "f8e2b636e96a6b26d8d45071b9069d28" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "3cadbc6d4fd3789cc561ef2938f1d6b6" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "26eab59c2db4a98b5f5b6044b44ec654" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "84bbb9d24428ef901ab7cb1930b7c035" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 30269440, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 22695936 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 26894336 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30253056 } ], "md5sum": "4aee7bd8ff570ea0541eeb1adbbef77e" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "66397c19898ce97beb7ebeac832b0bf4" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "dc2227ed7d67a38dbacab3eb321cdef5" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "3a697897630e2fbbc0777ab1ab6d9b27" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "02e7cf9bc747f3b5796272699254472a" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "a0bbf2cd12ba213b9cdcfdbd6692e8e4" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "b795eafa8cd3e4f1591ed00d1ad12fff" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "0209c12c993ff7ce26959a5fda60045e" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "4ce443f86a2d174c40037ad35b3719bb" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "27ffcb6d7284cc59fca2270b458a4366" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "d9b483e3acabc106c367bd090c4e7311" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "9c4c2d0b0c7fff81c28b039d1c13ebd4" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "03984c9bb9126412b963b4d3324a05a9" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "67acaa5e5c60ff8b800e4ca790534e36" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "43e3e93e9506d8764726ee213efd95aa" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "5a1713176edd3e570b10d602c7461c2a" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "67f95cc6e53536136d2bd8950191bb3b" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "fec92d61ed3ffa910e9587c4e0727c27" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "828a358bc79572087931bbf848660866" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "d602f3d45aa9a33925334326eb3d30cb" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "25dc8844f8e94db830fba945040ed85b" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "224383d6644728788ed4e106962a5355" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "fac45b07dc008a7a4e9c075c2be6c5f5" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "31c9c4a444c2e89fca28dbdef5ca58dc" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "5f9135006906dac1d55da4229e6960b0" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "a7e7966698f4820db430480be384f44b" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "3e73f71723f9a48e58ed7fcda5c8bde7" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "f911c10c1b33be286e514f98879b07a6" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "2c74d59beb4631db32d0ee857ad58541" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "dae763dcc9153bae1726800d88f98409" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "c8d6e4dcb4e6903e5739cb097bc1e1a4" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 23535616, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23519232 } ], "md5sum": "0ad51ee9350ba7923f5062aadfef27f0" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "b0dc80264b70cf5030a9efbf0786ec26" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "a4925c7bb26ea0ef2d5a93305dea00f7" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "53412b54a5958d3d48a53da327a4d17f" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "f4030f1054f78311e949edb143c50bab" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "cb27a1854b25d0c0a98cdf45efa3ecd1" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "4041f732d883d61a6e28dafc82b5de90" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "fa839e8042ef7952a13693c7ff0ed922" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "e620506ff977bce0b2686c5a4f3ef907" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 11763712 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 15138816 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26886144 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26902528 } ], "md5sum": "94d576eadd73a3068c7911112e00762b" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "1bf1c966ad4245d50391fc7bbd58d11c" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "a218c109571370f42fc34a005eea0123" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "93d15bd907da7ce514e7f9d9f292976b" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "cdc473999ff40da17ad59affd3f90eb6" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "99d2be7c8689e8818ba8f0e06d196d31" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "165ed44149d22c9f2465e227b00506b7" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 22712320, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22695936 } ], "md5sum": "38d1c7bb4839a65640b06efdd338d3e1" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "67a26d5603aba74e02b629a225e66e13" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "af197d7456a59b3f193e7cfd224a3edc" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "bdbc63c67d5087d07b826e262f1ed0de" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "40c9bca3a2d1d886783cb09b9d74b13f" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "1a05e0dfb4ebaacc3a1677539934c703" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "ff50c9d625a719b8e60507330a849cf3" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "f2b8a299c2c52aafbcb33261796e857c" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "9d5531fb05aa08bc0d786e34df07a939" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19337216 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31084544 } ], "md5sum": "7f75e212ad547054aa5e7bcdf7d10552" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "56c9244bd895b6e3d830dd9037bcfdf2" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "5b6dc6c4f3a737ac398904aeefd4dd10" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "cad2178cb29af0ff72352f98119afc20" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "53376588da37e9efc6fc26bea813ee4e" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 3358720 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26869760 } ], "md5sum": "1c02d9078d5f6d059ed33be2d922efe8" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "f620ca727ae35d14c845ec0d6217d0eb" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "8c7f98929c73dab36d7012450fc7d1e9" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "1a976836c14cb9dc9a08aee3c604c772" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "514e89cb379a4e8f8007d8f2fb182e83" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "3b8da83d076efb77f97b05d084a26cd4" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "d6f4193121dac71431754de3e6c2f412" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 15155200 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26902528 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26918912 } ], "md5sum": "e462f3f98f6ce3d72bc45801fe80970b" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "d735049c9de316842434d96468e22e8e" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "1abeca7f5bb3d202eaf29ae26f7092b2" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "7b7356ce0d6f53f1ce5080d4f4eb98fa" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "e1b3e9e9ed9ccb8cb4804c9cf30ad33f" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "65c77d460c48c8f5b0111ed40b0382af" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "01f10445dcf40cc96a545f693a9699ce" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "9713ad533de0b17b0811172704ad397e" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "f7de36044b9b7ce36cf52c0b87a27a73" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "b6d123403fd3032765c4358924bda2e3" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "04d6b87e75ae96edba8ab9c74b1d6bd2" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 30269440, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 22695936 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 26894336 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30253056 } ], "md5sum": "e9b68fac678bc0e6e3e1bca423f5b813" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "2c3652d32baa0fa70cc30ba8a717a4e5" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "6260942e5c3a4c58b8177fa5f543737f" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "df4593a8b4d66847415685aeea5df196" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "c4b26c9028348cc952a8db39ba577a1b" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "9b00540dc5dfa455428b4fd7f9887281" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "6baf43f6660dcbdfd927b74a778b1f8a" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "c3771b630d9e120361754c667e73f926" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "d5e49b611d8dd90032c14d8bad444e3f" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "072f600c83dd87691ba15637a16987f3" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "7e4072ea6b4374770b43a25915a827ef" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "dd8da534be61052866b3be533ad3fa09" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "1096bcff6ddf13fb022ae57893e323d1" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "a95182c11ad6d959d80cc69df890d882" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "750fe17d270159db413eba2594574271" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "8ebcdb07ab0864953d97ed7d26bbd92b" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "c260e1e10f0bf7a1a6440c462509a397" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "3072dec48821158f5a44a161c83263e5" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "6bfef55f35a71166c6a4c06ecb3fee36" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "87ba1ad4c1bdbcdafc3695f8eb35dcbb" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "e75590b3c9263625d309aba7a8efdd80" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "31fa84128ffd0d173b5bd0d73c12b96c" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "75490b43eaa6abd9b8b4563f01f96fc3" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "d3958978275ee7e67fb9a0da561a81f8" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "a1916ee08636c2da4a18dc62421a05b8" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "36d6d7deca545c1430c7bd736cb9d580" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "6ace2c74cce6df5c2abed402949ddfb3" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "7fad98ef1fa0e939a64977d4c57a3488" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "5143e69fa4c1c532f6f218fe19aeb215" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "a73f8937d3ddf9cdfed6f49f5f6ea6d7" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "24efcd17a1378c1344877d8dac0eb9dd" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 23535616, "records": [ { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23519232 } ], "md5sum": "1d41359467232e8e24b666e8d665da30" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "090a9ecf851c094b20fa9236a41632d6" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "04ce4ebc8e6120ccaf0e7552854d8974" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "d19d0977af3c4a681673eb633c0047d9" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "74424afa70c29cafc8e7a865fdc2c4d9" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "c7d8bc397612de310b4ed88760da9a4b" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "9b51bb61a3f18f369dac7627f2306f7d" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "d29a563b2e04092418c93ef3d389d897" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "2a4db3e8e339e12a08f168e9ab7410e5" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 11763712 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 15138816 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26886144 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26902528 } ], "md5sum": "dceb99b04557c2f93441665fa412aa88" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "9f2c6f58dc07725b8183ee5d3ef85a56" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "931786a7bb9742b184577d88b8e25061" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "6b667ec0cb046ef92005427bf0e87df8" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "fe0d95f97c12ae0a6aecb016103d63a2" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "26bf2cc42f00a33189aaca19ba69dc9f" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "133be4513fbf1b4b0e4c06fd8edc9143" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 22712320, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22695936 } ], "md5sum": "1efda9072cace9cf085daa35f891ef17" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "7e55426bfaa94c62746a4097983e6c61" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "bb56929a06e19c2d131b86ea1491f4a3" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "413309456c3674fec2d2e928a7339d83" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "41fa6b836b228c6857cf3651ae09fbe3" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "8e8a353b3279af34f5582e07d0ffa152" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "9651dd7ade4fc4be04bb5ee4ebdd3394" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "a1a0bc200f3e046744bc215e2096c824" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "f67dc4f9b7b69e6c0d7d49e05b93c667" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19337216 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31084544 } ], "md5sum": "e0293237ac7d5adf194b10e818ffe014" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "246184c722b52a63b8bc059c10ecccbf" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "13dffac87029c6775ebac18a195c325c" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "eeb9e3e631c6a0df8ae7b53e07451ca5" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "307783c4dbf9dff08003e17dd8ab160e" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 3358720 }, { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26869760 } ], "md5sum": "23386020ae0ca30a1cdf25202c8a3bdc" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "3fbd97f50a2e80a222aa07675f64b842" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "62d18bf057596ff25e835f14f3eb9875" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "0b8965e44e28ffc1321797dc1210b076" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "c099b73da0b82588d5dcb21b5cc278bd" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "3287d25fa4eaf9eb3a6c2afe3ba54a4c" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "0c52f14416bbcd9c385bf208beb40536" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "96ba279f2b5eeb0c5e27cf685fd0554e" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "30395ca474e35a0230f629b0a5de8f50" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 15155200 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26902528 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26918912 } ], "md5sum": "21be87652d14b58f9ca256356e045266" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "ffc4803d18a7c02c12b48c2a68747179" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "b5d744a2e9e24e65c1bc4a017be7d5ab" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "364e23efba4a4359e63ef4df58088409" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "112a49e419e7cb3b474b40e4ceffc9fb" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "967cf985d65f70ee168752fb7c5ed10e" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "a886f3cca2aee32b2466227328e52884" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "4bb488d86e161f7362a535a99231ba28" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 26910720, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 22695936 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26894336 } ], "md5sum": "3d4431ef0e2812ee9f85a6d3fd4f72f6" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "82b04b74cd94a10fbfe3e805075b296b" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "c8bb50e3debfa041bd35769d6e6386fc" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "205cc818e9f2cfa27a5a059c163c7f12" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "fcb4886d2faed3ae53bff82634287a1b" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "9f90e39a55c142c1a26834addea8e585" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "0a2b2d3e7e00ee7cf6017c46a20be908" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "e44b8cf5d2d86d1e38b674cb1422b358" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "d70fa0fc51c370f3b66e1e90ae88b72c" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "2e390caec19bda3c41a0588dab2f3b1a" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "34dbb3366ee6f4f9a4e616412323a0b9" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "a232895b056065df89708f660ca7eb6c" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "d1853c33dc2de244586dfe8fec5057cd" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "9d80a8dfc234a3ee8b39061f199a56b2" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "378e19ba4390382462c8508717d721bb" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "41c1da2fb3b08594e8f38de8c386a856" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "fdd7f0d1e78f8e247585a35f1953308a" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.42.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.43.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "dd7e5f56013d7017fd54aef283c1964b" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "a90efa1dc93c042d1c524061f2b11b2e" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "e7b15a8fd669f2b0f0604bc92829646a" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "111021fc197b6361004a3536e06bcf76" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "a20bd82a696f4f908368c1167b7f0ab0" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "5bc6ffdb43b8d88dc7ed2ab0f369dedf" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "a44ab7b38bb5593b216c6ca663271e1d" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.44.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "f99e4e524c97995a793f704c722a50e1" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "337b85e0eda6bd79d6d80871aefb0156" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "e1032e51eaeb6374db33368c724f996e" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "27a0518359f0e23091c9d4c39d47d2aa" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "c55d58b8b568acd4eeeeed77233a73d5" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "4a31c9ad18e9b06edd5f9f0ee0b008a1" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "eb661117f8068611fe8d4d9474ee32f9" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "864d10717312494dd95cc4481efdc888" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.45.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.46.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "56e2d2b23aae86fd26868bd7ebe131b5" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "a5726012e1f622c5c27beea36941639b" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "72623bac2b2d1853b685a44734e39531" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "b3f77cf29a14122728f4f79909b978a0" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "99d611b1b6d4ffc5d37adffd61f74967" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "2a8e32e8b9a1c13d611939a22e9ac6d6" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.47.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "4122e864361bc7070e55cc173596a6cc" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "a15642d0bb387e4218f5fb134fa08e84" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "5df611f8fe76ea4752150ced560f3cfc" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.47.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "37a1872c9fb9076452dc28df6adb197d" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "5b913c84c2d02344b3a0cef60220a8f3" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "418ec4724e682c540a745b0f31292122" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.48.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "69d6a2f214ca48eb07d28545078b163e" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "dd9445df4b0362cba30d76ba9bc3caf6" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.49.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "dde62871e184a48293dbe50488a50d74" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "e0a14db14bb750a8d08a94f841fe16cb" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 23535616, "records": [ { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.48.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.49.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23519232 } ], "md5sum": "99e4d3ca98ba68fe20dfa6d89d3a69fd" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "9cb6222036cadcf44dca1a6e27533776" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "204c9d46384c20b3f0fd950b6b7e207f" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "cbaca1c48faac93b6c4ae3bd2d13d4fb" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "b7e0265b608fbada3377d6371769c9bc" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "2eecc52e6418582b8fbdaafe10d6ca92" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "384cdc8d17fd72fbaca68fffbfb00c1b" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.50.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "8c6c6fc533f6b6647353e2491ac5e2f8" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "82b95f3d47f9e4912164cf4d2a84d948" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 11763712 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 15138816 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26886144 }, { "name": "model.layers.50.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26902528 } ], "md5sum": "1f6ed72bce5972e6bd43974b936d32df" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "abf4053c2d43731ffe989458af7b84e9" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "9f2341772fef8c7927e75790099e24f7" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "4e60b7668cb2192bd4ee6ed7437ef3af" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.51.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "e4e2aae29ca45ac16cf47576f798e1be" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "48d90ef125f738aadd8323eb22044dd4" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "73f8ae6756366770f959de32f6e05af7" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 22712320, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.51.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22695936 } ], "md5sum": "eaeb795213ec722f1932eb275f56e24e" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "cfe61800881204bc1c668afbad39a5cf" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "31e0c233b0b00bb150ff8499d36d6b87" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.52.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "721dcb184e38db8007544d5ce4972fac" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "463990557c6599f84d6b261116120dab" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "1cf363444055d2902da975d536371020" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "aa120c9b248911e2cdf7a64ab0ea6aea" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "16a14156860e391f0467f12327e238f9" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.53.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "dd35daa1b81db0eb59f4c65bbf658276" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.52.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19337216 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31084544 } ], "md5sum": "e4c5f95ad2701383915ba4b5debe3d1c" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.53.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "7fb05ae33a23dbe959faf94ba0729181" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "97c0dcbbdfb4b25ca32ba04a998bdedf" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.54.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "2a773802c7964b23b61cd52c1a053615" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "e732aad57642f56fde62113c3bdd5e59" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 3358720 }, { "name": "model.layers.54.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26869760 } ], "md5sum": "ea47b8cfcf64c4f527041d2168018c09" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "009e9b5894c5c6625135293de848fc9b" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "0c60fb8a32f8b80388156bed84ea7578" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "ffc5ef164e2745e0607cba78dd6ac08e" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "4ef1bb28f020dbc92adab242b17971b6" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.55.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "5d9c944750569525a9c8237d60354074" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "66ec3bafdf8482810a0cbcc559f1d175" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 15155200 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26902528 }, { "name": "model.layers.55.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26918912 } ], "md5sum": "945e6e634a6e7d61afb04c17168a40d9" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "2aeb274bc4408e94efc81c12848f36d2" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "8a3fa570bcb65a3d27c3f1d7853802d2" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "db5d9c87f75d9b995144940d3496e071" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.56.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "7262d706b138f55f290e0704f8fe1a7f" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "76369fb49a3f6574c441f2478f77146e" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "c59a9fae455cbdf91381b9a3d705d7d7" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "346ab1d8f32be50516476f83a6d566ec" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.57.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "d8bc38770451c90e41f3b5da34b48a77" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "eb6703a1211505bcd6d6d5d731ef4223" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "6358a1f2d70af7908d5200bac99f756d" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 30269440, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.56.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.57.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 22695936 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 26894336 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30253056 } ], "md5sum": "8c5fb4f4830463da77eb464fb33aed0b" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "890bb8da90c8305b23663a9592d8cb4e" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "e2dbb774beff377aba55b1464630b0b5" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "bc24c80118a7384e1a05d6c299396fc3" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.58.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "df02798dbe3b68df547a25d2dea4ea5a" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "0e4b8d23d6e3016addd27cfadb15b4ae" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "e1644a38f4377b2133659057c26656ad" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.58.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "e58f78db8061117d4f943da8058f595e" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "8c4a26941355be0774ad93d7cf2e43d1" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "512435047ef43ac6185a3c232fa825fe" }, { "dataPath": "params_shard_324.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.59.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "1903bf321465a3f3fe3dcf1e87eb53ce" }, { "dataPath": "params_shard_325.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "85c735b79382f0d3c095e8924144b099" }, { "dataPath": "params_shard_326.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.60.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "dcc24a2db8d5fbab64df08db38f6d6ff" }, { "dataPath": "params_shard_327.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "87fa881f33c536c05ae0997264d12b59" }, { "dataPath": "params_shard_328.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "6bcbad11d1c589fb246304821764de0c" }, { "dataPath": "params_shard_329.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.59.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.60.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "8c7bc7c113bd359e56ab1489eca4315c" }, { "dataPath": "params_shard_330.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "1cb6cd024cd0ee0f7b2782057c6e05ec" }, { "dataPath": "params_shard_331.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "b8a12b06c74a51d0b5e8c94cca636ab7" }, { "dataPath": "params_shard_332.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "73673e599c5af53b99faa2be01beddbb" }, { "dataPath": "params_shard_333.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "5854e21316be164d5042a10ccad969f3" }, { "dataPath": "params_shard_334.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "1dabb52b3783ee944acbbc62549c1db6" }, { "dataPath": "params_shard_335.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.61.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "343dbe4061269298057d26ab42b20d52" }, { "dataPath": "params_shard_336.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "cf02c64fe7fc06573cd3992f48823b66" }, { "dataPath": "params_shard_337.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "1ef41340f537b5bdee6b0ca5fbbb5cd0" }, { "dataPath": "params_shard_338.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.61.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "c8effcdf5f9bd9c748516dcf513ad5f9" }, { "dataPath": "params_shard_339.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "81433b5aaa936f7c3df008e630e0a97f" }, { "dataPath": "params_shard_340.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "0b8ca52fc7369e462eb9182bf0a7807c" }, { "dataPath": "params_shard_341.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.62.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "07670440e1139021ef417000c3cb7ada" }, { "dataPath": "params_shard_342.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "c8f69b27fcfafae9e2561c7e87b06f46" }, { "dataPath": "params_shard_343.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.63.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "7030654e7c0fce24d48eb1d6e7977ba2" }, { "dataPath": "params_shard_344.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "c519f5aa272d7010b3af0c202fe59d94" }, { "dataPath": "params_shard_345.bin", "format": "raw-shard", "nbytes": 23535616, "records": [ { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.62.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.63.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23519232 } ], "md5sum": "1cc586e81617850f5cefeb8c842596da" }, { "dataPath": "params_shard_346.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "441aa892ff6ff5a0006002086db77e50" }, { "dataPath": "params_shard_347.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "dd4916180eec5a91ee70338d9cf4b4ec" }, { "dataPath": "params_shard_348.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "78c87f1cfbb4cb62a529e29daac36759" }, { "dataPath": "params_shard_349.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.64.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "1e9f9fbd8b6fa2fa4ffc0c7803ed9b02" }, { "dataPath": "params_shard_350.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "30a1037542d3c0247f7ab752ab05cb66" }, { "dataPath": "params_shard_351.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "0e2fbffe95233136ea3d978166d7c9e7" }, { "dataPath": "params_shard_352.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.64.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "1b9d336b2ce8a05dc5d51c287746cfe4" }, { "dataPath": "params_shard_353.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "c57da21fe1223ace8f564e77a16430bf" }, { "dataPath": "params_shard_354.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 11763712 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.64.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 15138816 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26886144 }, { "name": "model.layers.64.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26902528 } ], "md5sum": "d2e87920bfc92d5306dab5a503c071e9" }, { "dataPath": "params_shard_355.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.65.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "88c6deb7d76095aa900e51bc8e92728d" }, { "dataPath": "params_shard_356.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "42e5d998cada889d99aec0137c7c0cdb" }, { "dataPath": "params_shard_357.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "57a8edbe44224038b14c16a8d4232ebd" }, { "dataPath": "params_shard_358.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.65.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "7762817c7fc469e4e87a5a6877553267" }, { "dataPath": "params_shard_359.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.65.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "c264d231c260ea9d01857049da2bd069" }, { "dataPath": "params_shard_360.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.66.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "405208a8b62a6276b8f49b0c4022a432" }, { "dataPath": "params_shard_361.bin", "format": "raw-shard", "nbytes": 22712320, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.65.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.65.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.65.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22695936 } ], "md5sum": "eca36381ce342277736014bff63b42fd" }, { "dataPath": "params_shard_362.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "7669a16ec17357b2594f0e7571f397f4" }, { "dataPath": "params_shard_363.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "38a4770e66d099c3b732998fb605461e" }, { "dataPath": "params_shard_364.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.66.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "1e145d23a3ea500bbb88870d0295a417" }, { "dataPath": "params_shard_365.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.66.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "f707ba4d3e6de9063f77a16764cb515a" }, { "dataPath": "params_shard_366.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.67.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "60f6d4baf354dc15af5e9d1126a53040" }, { "dataPath": "params_shard_367.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "7fcdbadaed1798416d9e06d545aa2eb8" }, { "dataPath": "params_shard_368.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "a9e710e88891c6c10cf60eab90c43727" }, { "dataPath": "params_shard_369.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.67.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "a4088b8125a0a2678bd394ec384dcaea" }, { "dataPath": "params_shard_370.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.66.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.66.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.66.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.67.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19337216 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31084544 } ], "md5sum": "5ed662a1fb2509355a37077e624102c2" }, { "dataPath": "params_shard_371.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.67.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.67.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "32b48c1de505123bd44d72d8ea491ff2" }, { "dataPath": "params_shard_372.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "f7334b0662bd1f6274a0bb08bb4dfd27" }, { "dataPath": "params_shard_373.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.68.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "f874212147c859e4de8eaff13100a9c5" }, { "dataPath": "params_shard_374.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "06d0dcb2cd121dc3bc732437c9e7b583" }, { "dataPath": "params_shard_375.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.67.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.68.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 3358720 }, { "name": "model.layers.68.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26869760 } ], "md5sum": "691002f3b73faba9cd1383bd0c0aabcc" }, { "dataPath": "params_shard_376.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.68.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "1a6bb88ff4e11cbdacf129b17a1dcab5" }, { "dataPath": "params_shard_377.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.69.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "52434c067d0477d95008346623bd5fa6" }, { "dataPath": "params_shard_378.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "6799b4871a0719f52482c0ac334b32f6" }, { "dataPath": "params_shard_379.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "d4f0c83d0d66ec47c4ce441b7e11af98" }, { "dataPath": "params_shard_380.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.69.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "defc1746013d5ad6370379b021c70b4a" }, { "dataPath": "params_shard_381.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "04c148f16f5ef2e7f735b1c1dbdf6e92" }, { "dataPath": "params_shard_382.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.68.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.69.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 15155200 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26902528 }, { "name": "model.layers.69.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26918912 } ], "md5sum": "8646fad5200b90738c013de5a1462c7b" }, { "dataPath": "params_shard_383.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.70.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "d2f2fae4625f9c97d5ff058dfd393fa7" }, { "dataPath": "params_shard_384.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "e21808f1b03a49d410d92fc43e04a480" }, { "dataPath": "params_shard_385.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "5a3a73321cbe56e7d32b9bc50e490baa" }, { "dataPath": "params_shard_386.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.70.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "f1c283ca2db2f16e7d7e4dc9e6f20b97" }, { "dataPath": "params_shard_387.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "9964fa426c0b9bf3855c889653a370ae" }, { "dataPath": "params_shard_388.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "22b1bd8146d711aaa45c4c67f3adc368" }, { "dataPath": "params_shard_389.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "aa512f4d5f0a5dc084bbebd1314177fb" }, { "dataPath": "params_shard_390.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.71.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "96b66505157e4f5eb3eae83ab01b6809" }, { "dataPath": "params_shard_391.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.71.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "7f4d8c3661d54c12375cea5e82719c13" }, { "dataPath": "params_shard_392.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "93a77430091259c5e5c2d2700147c2ae" }, { "dataPath": "params_shard_393.bin", "format": "raw-shard", "nbytes": 30269440, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.70.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.70.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.70.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.71.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 22695936 }, { "name": "model.layers.71.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 26894336 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30253056 } ], "md5sum": "45bed7d319e2079b1cb6bc284e00056d" }, { "dataPath": "params_shard_394.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "de8630a9a7bb5f67426d86bbfef17f57" }, { "dataPath": "params_shard_395.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "e0b9db5b6f78b8e3a6f33898305c502e" }, { "dataPath": "params_shard_396.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "e2f8bea6dfe5deb37f176828d73f86cf" }, { "dataPath": "params_shard_397.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "5ee766f28189128557638fa31f3c7660" }, { "dataPath": "params_shard_398.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "0d6555331560bd6d5961ebdebfb2a24e" }, { "dataPath": "params_shard_399.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "6e0d034ea9812ade840df7d6c32d472e" }, { "dataPath": "params_shard_400.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "0dc2308c9580c8f569475dba5c3ace4a" }, { "dataPath": "params_shard_401.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "bf6f3cd6449cebb8724d810a67f5bb10" }, { "dataPath": "params_shard_402.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 11763712 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 15138816 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26886144 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26902528 } ], "md5sum": "7fb073d8ef65a14d27c2396391b00266" }, { "dataPath": "params_shard_403.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "35b5d8437fdfc13abc2557742d651fab" }, { "dataPath": "params_shard_404.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "0ce7c0212a696d1b9240b4c9c0d095b7" }, { "dataPath": "params_shard_405.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "ad0244056ba74064c00a4dd329861e74" }, { "dataPath": "params_shard_406.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "469b77c65c70b748afedf7e7d5cd16bf" }, { "dataPath": "params_shard_407.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "243f6fc9016298b5cc7265c9b1bd1b8e" }, { "dataPath": "params_shard_408.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.71.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "4fe1baaa4505c481cbc1c162ea634158" }, { "dataPath": "params_shard_409.bin", "format": "raw-shard", "nbytes": 22712320, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22695936 } ], "md5sum": "3e20a07aceb3e936fc71ab19839c74e5" }, { "dataPath": "params_shard_410.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.72.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "d26c4b59ab16b5bb7bafeef24364dab5" }, { "dataPath": "params_shard_411.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "b348967819cce122e1a564eddd874a10" }, { "dataPath": "params_shard_412.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "442e764bff1337cc1ba148580128b363" }, { "dataPath": "params_shard_413.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.72.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "e112a40a3217efcab796c80c85a038c0" }, { "dataPath": "params_shard_414.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "697cda91e48730ca119e5e48886dbc5e" }, { "dataPath": "params_shard_415.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.73.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "c7d9ad7bb1ef317339d6861d4aeb478d" }, { "dataPath": "params_shard_416.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.71.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.72.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.72.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.72.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "d9df58105ac12d01d43a544def9b821f" }, { "dataPath": "params_shard_417.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "e35f2c964dc4d65d0ef842fb6eabc44b" }, { "dataPath": "params_shard_418.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "2d3d284def90311f38eb1fb0927b69a9" }, { "dataPath": "params_shard_419.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.73.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "2ecdd8d1095328fadb9d93db10b5ba17" }, { "dataPath": "params_shard_420.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.73.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "e51d08a1b3e91ea78d9edb12f3ef4342" }, { "dataPath": "params_shard_421.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.74.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "8f9a2ec8dc45315abf905a9c91d10820" }, { "dataPath": "params_shard_422.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.74.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "f42870934c3b0cd7c761401144b89e90" }, { "dataPath": "params_shard_423.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.74.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "ec6432fa3905810666e36b25a4419251" }, { "dataPath": "params_shard_424.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.73.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.73.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.73.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.74.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.74.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "c4cac964836ad19afa75316c075a8f65" }, { "dataPath": "params_shard_425.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "aa90471382e008472d96b2f228b6d868" }, { "dataPath": "params_shard_426.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "952ef528cf7cdc3b3e0c0cdc1825d1a8" }, { "dataPath": "params_shard_427.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.75.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "111531a0aa45a2f08af111520e623f4a" }, { "dataPath": "params_shard_428.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "1e8c94c77c5e84c6da07d9f37f1db212" }, { "dataPath": "params_shard_429.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "71c07ac9ef1c1b7dc81ddbbd8edb613f" }, { "dataPath": "params_shard_430.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.75.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "42beb051f20350c0147d1bc82f185650" }, { "dataPath": "params_shard_431.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "cd19750610bdca2ca11bf90b2a87fbc3" }, { "dataPath": "params_shard_432.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.76.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "e1809cdfd7fa3499de330dba41a69cce" }, { "dataPath": "params_shard_433.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.74.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.75.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.75.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.75.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "eeaf1d28cc77f240eaf75eb279833ec9" }, { "dataPath": "params_shard_434.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "f5963cdf3928fb8c5d0e1d6c7a5e31f1" }, { "dataPath": "params_shard_435.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "8a5d50a733e1649e8961d228689f62c4" }, { "dataPath": "params_shard_436.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.76.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "cbf3d3a73ec723cd9ac00d1f626b3935" }, { "dataPath": "params_shard_437.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.76.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "1ac0c87c33d3c90d02d4e61c1c6bc5d8" }, { "dataPath": "params_shard_438.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.77.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "4962b849e576bb88c3cfae59fd403da1" }, { "dataPath": "params_shard_439.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.77.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "99f8f10fbbebc9bd40a3a2a6227bc5ce" }, { "dataPath": "params_shard_440.bin", "format": "raw-shard", "nbytes": 23535616, "records": [ { "name": "model.layers.76.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.76.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.76.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.77.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23519232 } ], "md5sum": "ffd2c8b3056dc8062297daf352305c0a" }, { "dataPath": "params_shard_441.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "1e215dc0fb22c8e01b0deb25814bae05" }, { "dataPath": "params_shard_442.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "4336195af27abc0284858b229adf6262" }, { "dataPath": "params_shard_443.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.77.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "7d3db9276b91bd6b2a9b873628a071a9" }, { "dataPath": "params_shard_444.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.78.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "8e6261ec272011be178869c5649b9e2f" }, { "dataPath": "params_shard_445.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "36cefeb759b27f6a13544e1b8f44e63f" }, { "dataPath": "params_shard_446.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "14ebf5cbc9415a4333ae96ffae43ff8e" }, { "dataPath": "params_shard_447.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.78.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "d38dc1b83c1d08ac9492adf4f9d4237f" }, { "dataPath": "params_shard_448.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "a666053bd43d4cfeae007f7ad201e885" }, { "dataPath": "params_shard_449.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.77.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.77.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 11763712 }, { "name": "model.layers.78.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.78.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 15138816 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26886144 }, { "name": "model.layers.78.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26902528 } ], "md5sum": "61b954a9705bb188007d20ac7d6efe50" }, { "dataPath": "params_shard_450.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.79.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "a2f129bdba42e1b88fa255377a4e4cdf" }, { "dataPath": "params_shard_451.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "63295d90d57458c7bfd5adbff3bf0ef9" }, { "dataPath": "params_shard_452.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "e3a4f300f4eb2aded96e82799dd79bbe" }, { "dataPath": "params_shard_453.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.79.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "106f42c8ee2141ed4374ea2ded38bd6c" }, { "dataPath": "params_shard_454.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "2720b4074ed368e43c198d9b7cab8fe3" }, { "dataPath": "params_shard_455.bin", "format": "raw-shard", "nbytes": 22712320, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.79.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.79.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.79.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.79.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22695936 } ], "md5sum": "ca160894ac930a68f07df9f312658c7b" } ] }