Qwen2.5-7B-Instruct-q0f16-MLC / ndarray-cache.json
riczhou's picture
Upload folder using huggingface_hub
f35b1f2 verified
raw
history blame
100 kB
{
"metadata": {
"ParamSize": 199,
"ParamBytes": 15231233024.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 1089994752,
"records": [
{
"name": "lm_head.weight",
"shape": [
152064,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1089994752,
"byteOffset": 0
}
],
"md5sum": "cdca534f95b0f8cade80b96e842e55c8"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.22.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "6d2ffdda94b495a654f93b3a04df06f0"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.23.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "5c1acaba71654507dbc123c99c947404"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "9056169ec2bb0c842277d7c6ed17a4d2"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "b77610894f554d3e7ea41c0e7705e739"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.24.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "0ae032cc345cdda5ce13f74f4cfba526"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "15b47939b585856133a0bb40f1d5eb59"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.24.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "9b9f3153d6eaf8ea668f3830ab7f8790"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "aa7bea7e9997f5fe96725be8b98a2fbf"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.25.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "2f395418bdb0190381f2d7ffd4d9fb88"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "2f5f7550fd2375ba00bdefd5ed39de53"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.25.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "148b1e6e19a9cb04878a323114cce229"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "d89aae53b00c82f5452b7aea97048ed2"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.26.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "e7a58fc1091af799e8ec6eaa8a2a69d8"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "ab6bccc40154db925f1f8df29b454a96"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.26.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "c8fb1b5b262e2ad72b4bb31bcf8f9e15"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "ca53275030995352c1c60a7fcb775d1a"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.27.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "8b54babd972f153608c5211065c193ed"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "ea63930e7207f32a8a4c77e9a0cc2a45"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.27.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "0bd6fca94b56bdc6eb4db96c161218e6"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "f115f1db5cd60babf240706590c4175a"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 1089994752,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
152064,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1089994752,
"byteOffset": 0
}
],
"md5sum": "24daca6774c540da30a49e926dd5f0eb"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "634d9deb2dfa1b4aecb159bf4d66c427"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "a95f91ee8b213d02bbe3876bb4caaabd"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.0.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "9a76126f34c09ec6f3914c7f563e9453"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "1d60b267b13cc7b2ff258c7f5e89cd29"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.1.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "27415e635043d1f9e86ada121b3d5250"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "8f09e1e4777caa919d183cf8dda95252"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.1.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "ad1b3499a169a1df3740c12d4116f40d"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "7a3797c0423ea80154f771f226d11470"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.2.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "8ef169df019f6750290b642d75b4babf"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "99549e01261746d416ceee43ee16a535"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.2.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "c2bcd1081f32dc1836bdc84242eb7621"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "71cca3651f3ffdd9894976a10b762b63"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.3.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "458406c8ce8e5f642e40c27d2a517be3"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "32ca49690d14f4f4cb41a56131cb0ea9"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.3.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "ea8c51f9880d1edd706fd9fa7681c830"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "f0e25085ff295eb8120ec67d4ce2d5bc"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.4.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "130a141311bfb8a11a8414fba5271522"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "5c80ead475dc17f82753192a048f6a80"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.4.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "b56c6d7a1ef84967fd9d81ac54e97a28"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "00c8f7668baad7bb670bf3bcda7fd04b"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.5.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "3c076aad831d68f3ba736f082df892f2"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "37cfcd8722caafcf081a1bfc4a79ede5"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.5.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "6643007e6dda68b7f9e1684c68726ba2"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "4b334c87ddc56bd8d9e1b864b9a700cd"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.6.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "f9ca9318c5680031e5dc99a39b9e206f"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "dbc9563e8d536145609439599af9b180"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.10.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "ccaaa5924181d2868874dab5d017ad59"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "b7ed561904f64f0c6f54a86cbccbeaa3"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.10.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "71fd7bc08fae28bf75a63079ea6ca140"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "840e84a3b85c455e31c8587c00f0141d"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.11.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "2837c008ae7e11f3df62cb0df109cc1e"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "1f4722d3341171b3b9349fa66001fff6"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.11.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "343a1d7282bd2dbc6347a3ef70d880be"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "fadfe66b4bef29bdfac9c84f6c331905"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.12.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "9ea1daf3983963dbfd00fdb06ef9e84f"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "4f737911fa360182e95bcfefad4b1909"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.12.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "8ec296f7f1cfc7e98f6d0c374536b62a"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "0b2f7d561a26d7563401ccbf155b4930"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.13.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "b83862070a4b0bc7ab7a561bed14e6aa"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "8069795ed8f9cd3b066f2290f49738fb"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.13.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "79df1f193e277c72a8aa948b74b610b4"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "dcd2cd27cbf82d7117ce191b07d0e12e"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "f374213c51b324c0535bde8db5a59131"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.14.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "0af6642f9485a6f1b2022d23cd653656"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "c7fdc26d87ef8bed8422d97cea549062"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.6.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "0ba359172b430cd0290da37cdf1bb4bb"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "93cda2d2c78173573f727fa4347441b3"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.7.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "bf8dbcce7b1793d9adfca8393936cfbd"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "9d3ae672daef6ef7c6a89ef3d6903b69"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.7.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "55bc17a1ead1856a727805ee107d8e02"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "c08ac284bed45de0e6ec9865d84e6bb5"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.8.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "3d23fdae957c1d11ccd652090fa8a2fc"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "126d568dad1627b91fe2f26962a1b117"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.8.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "09cd2b7eb8cac4dd9685b20622171858"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "173163b95581c2f8834aaabe50f36029"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.9.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "54951bcad8a60fac7cae1bf5605b6b5d"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "c585fef52caeb9dec06b149547670ff8"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.9.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "891773728b4a74aaf57ddf33f0edee9f"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "ad5a1c85de04975069fec35ba61f8b29"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.14.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "3ac68d2d670bcb6b3c7017a903a3381e"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.15.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "4a293bffd7072f077c26e503deb5fb0b"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "f326da2bf1dc6a25b2bde1e8ce3fa1a0"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.15.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "a91825bf8452b481424c5658d22be82f"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "39e70c16b45bb29a56d790f50ef0ff8a"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.16.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "48093844abd11d3944f2616f1a5cdccf"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "83d12e76a741a24b0d568b845db83630"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 33546240,
"records": [
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 0
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 7168
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14336
},
{
"name": "model.layers.23.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 23552
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33053696
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33060864
},
{
"name": "model.layers.24.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33068032
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33077248
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33084416
},
{
"name": "model.layers.25.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33091584
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33100800
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33107968
},
{
"name": "model.layers.26.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33115136
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33124352
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33131520
},
{
"name": "model.layers.27.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33138688
},
{
"name": "model.norm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33147904
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33155072
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33162240
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33169408
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33178624
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33185792
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33192960
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33202176
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33209344
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33216512
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33225728
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33232896
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33240064
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33249280
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33256448
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33263616
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33272832
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33280000
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33287168
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33296384
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33303552
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33310720
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33319936
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33327104
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33334272
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33343488
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33350656
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33357824
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33367040
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33374208
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33381376
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33390592
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33397760
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33404928
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33414144
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33421312
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33428480
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33437696
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33444864
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33452032
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33461248
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33468416
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33475584
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33484800
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33491968
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33499136
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33508352
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33515520
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33522688
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33531904
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33539072
}
],
"md5sum": "cf59ecb174188d26f8c41e7092ccc0ca"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "20bcdab82238e9f2411190f3ef306732"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.17.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "f18a191950aec889bd7dcd69e0f10179"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "cae677313687816876269f1ec6638219"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.17.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "a7b5979272c4ce649561833f25a6a5bd"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "4cf4ee73185f999e92bacb0202d067e0"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.18.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "f22a4d9867954ebe421238733c7fab3c"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "5fc7527d1e798c988048402eee7453b8"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.18.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "1df7d9af89d9d92a1602e82e20634a47"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "611fee9523365daa7bd74448f460f030"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.19.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "f37f9bae05d4d5f388baa70020f85606"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "8620c986cafec41c6ef8f856839da7cf"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.19.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "d94d6b2cecfd309a3489f2f28bb967a0"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "75a849d263d279b21150479aa8b61256"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.20.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "19ca5eeb35bc26bf0d39de76f439bd87"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "cb9659ddcb42461c1348da4776d9b57e"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.20.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "c943a084592ee2d19ac1f5a4e9d59038"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "e7e7dd75bea85fab11235674603f3a3a"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.21.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "aef4d0b4d7a364ac51fc755dd05ae04e"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "d35a8a381c8517ef72a42849602c6519"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.21.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "de71f9e8cb58dbc1f6970e0228232513"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "7e248b601d632decf76fd3f076e3888a"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "ef385194f21f1e40b54926c676b4573f"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.22.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "7baf47f3e75445d87299f742e98ce3ff"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "0ae90b849f43202289e6e039c4e8690a"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 33180672,
"records": [
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 9216
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33039360
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33046528
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33053696
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33062912
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33070080
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33077248
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33086464
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33093632
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33100800
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33110016
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33117184
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33124352
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33133568
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33140736
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33147904
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33157120
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33164288
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33171456
}
],
"md5sum": "b4b876946fc566f4217db081e3a96448"
}
]
}