diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,3025 @@ +{ + "metadata": { + "ParamSize": 199, + "ParamBytes": 15231233024.0, + "BitsPerParam": 16.0 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 1089994752, + "records": [ + { + "name": "lm_head.weight", + "shape": [ + 152064, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1089994752, + "byteOffset": 0 + } + ], + "md5sum": "cdca534f95b0f8cade80b96e842e55c8" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "6d2ffdda94b495a654f93b3a04df06f0" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "5c1acaba71654507dbc123c99c947404" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "9056169ec2bb0c842277d7c6ed17a4d2" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "b77610894f554d3e7ea41c0e7705e739" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "0ae032cc345cdda5ce13f74f4cfba526" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "15b47939b585856133a0bb40f1d5eb59" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.24.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "9b9f3153d6eaf8ea668f3830ab7f8790" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "aa7bea7e9997f5fe96725be8b98a2fbf" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "2f395418bdb0190381f2d7ffd4d9fb88" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "2f5f7550fd2375ba00bdefd5ed39de53" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.25.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "148b1e6e19a9cb04878a323114cce229" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "d89aae53b00c82f5452b7aea97048ed2" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "e7a58fc1091af799e8ec6eaa8a2a69d8" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "ab6bccc40154db925f1f8df29b454a96" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.26.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "c8fb1b5b262e2ad72b4bb31bcf8f9e15" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "ca53275030995352c1c60a7fcb775d1a" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "8b54babd972f153608c5211065c193ed" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "ea63930e7207f32a8a4c77e9a0cc2a45" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.27.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "0bd6fca94b56bdc6eb4db96c161218e6" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "f115f1db5cd60babf240706590c4175a" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 1089994752, + "records": [ + { + "name": "model.embed_tokens.weight", + "shape": [ + 152064, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1089994752, + "byteOffset": 0 + } + ], + "md5sum": "24daca6774c540da30a49e926dd5f0eb" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "634d9deb2dfa1b4aecb159bf4d66c427" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "a95f91ee8b213d02bbe3876bb4caaabd" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.0.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "9a76126f34c09ec6f3914c7f563e9453" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "1d60b267b13cc7b2ff258c7f5e89cd29" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "27415e635043d1f9e86ada121b3d5250" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "8f09e1e4777caa919d183cf8dda95252" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "ad1b3499a169a1df3740c12d4116f40d" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "7a3797c0423ea80154f771f226d11470" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "8ef169df019f6750290b642d75b4babf" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "99549e01261746d416ceee43ee16a535" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "c2bcd1081f32dc1836bdc84242eb7621" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "71cca3651f3ffdd9894976a10b762b63" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "458406c8ce8e5f642e40c27d2a517be3" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "32ca49690d14f4f4cb41a56131cb0ea9" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "ea8c51f9880d1edd706fd9fa7681c830" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "f0e25085ff295eb8120ec67d4ce2d5bc" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "130a141311bfb8a11a8414fba5271522" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "5c80ead475dc17f82753192a048f6a80" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.4.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "b56c6d7a1ef84967fd9d81ac54e97a28" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "00c8f7668baad7bb670bf3bcda7fd04b" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "3c076aad831d68f3ba736f082df892f2" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "37cfcd8722caafcf081a1bfc4a79ede5" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.5.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "6643007e6dda68b7f9e1684c68726ba2" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "4b334c87ddc56bd8d9e1b864b9a700cd" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "f9ca9318c5680031e5dc99a39b9e206f" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "dbc9563e8d536145609439599af9b180" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "ccaaa5924181d2868874dab5d017ad59" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "b7ed561904f64f0c6f54a86cbccbeaa3" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.10.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "71fd7bc08fae28bf75a63079ea6ca140" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "840e84a3b85c455e31c8587c00f0141d" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "2837c008ae7e11f3df62cb0df109cc1e" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "1f4722d3341171b3b9349fa66001fff6" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.11.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "343a1d7282bd2dbc6347a3ef70d880be" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "fadfe66b4bef29bdfac9c84f6c331905" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "9ea1daf3983963dbfd00fdb06ef9e84f" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "4f737911fa360182e95bcfefad4b1909" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "8ec296f7f1cfc7e98f6d0c374536b62a" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "0b2f7d561a26d7563401ccbf155b4930" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "b83862070a4b0bc7ab7a561bed14e6aa" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "8069795ed8f9cd3b066f2290f49738fb" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "79df1f193e277c72a8aa948b74b610b4" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "dcd2cd27cbf82d7117ce191b07d0e12e" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "f374213c51b324c0535bde8db5a59131" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "0af6642f9485a6f1b2022d23cd653656" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "c7fdc26d87ef8bed8422d97cea549062" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "0ba359172b430cd0290da37cdf1bb4bb" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "93cda2d2c78173573f727fa4347441b3" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "bf8dbcce7b1793d9adfca8393936cfbd" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "9d3ae672daef6ef7c6a89ef3d6903b69" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "55bc17a1ead1856a727805ee107d8e02" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "c08ac284bed45de0e6ec9865d84e6bb5" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "3d23fdae957c1d11ccd652090fa8a2fc" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "126d568dad1627b91fe2f26962a1b117" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.8.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "09cd2b7eb8cac4dd9685b20622171858" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "173163b95581c2f8834aaabe50f36029" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "54951bcad8a60fac7cae1bf5605b6b5d" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "c585fef52caeb9dec06b149547670ff8" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.9.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "891773728b4a74aaf57ddf33f0edee9f" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "ad5a1c85de04975069fec35ba61f8b29" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "3ac68d2d670bcb6b3c7017a903a3381e" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "4a293bffd7072f077c26e503deb5fb0b" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "f326da2bf1dc6a25b2bde1e8ce3fa1a0" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "a91825bf8452b481424c5658d22be82f" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "39e70c16b45bb29a56d790f50ef0ff8a" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "48093844abd11d3944f2616f1a5cdccf" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "83d12e76a741a24b0d568b845db83630" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 33546240, + "records": [ + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 0 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 7168 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 14336 + }, + { + "name": "model.layers.23.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 23552 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33053696 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33060864 + }, + { + "name": "model.layers.24.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33068032 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33077248 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33084416 + }, + { + "name": "model.layers.25.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33091584 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33100800 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33107968 + }, + { + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33115136 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33124352 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33131520 + }, + { + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33138688 + }, + { + "name": "model.norm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33147904 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33155072 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33162240 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33169408 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33178624 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33185792 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33192960 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33202176 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33209344 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33216512 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33225728 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33232896 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33240064 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33249280 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33256448 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33263616 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33272832 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33280000 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33287168 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33296384 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33303552 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33310720 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33319936 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33327104 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33334272 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33343488 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33350656 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33357824 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33367040 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33374208 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33381376 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33390592 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33397760 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33404928 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33414144 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33421312 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33428480 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33437696 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33444864 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33452032 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33461248 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33468416 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33475584 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33484800 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33491968 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33499136 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33508352 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33515520 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33522688 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33531904 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33539072 + } + ], + "md5sum": "cf59ecb174188d26f8c41e7092ccc0ca" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "20bcdab82238e9f2411190f3ef306732" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "f18a191950aec889bd7dcd69e0f10179" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "cae677313687816876269f1ec6638219" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "a7b5979272c4ce649561833f25a6a5bd" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "4cf4ee73185f999e92bacb0202d067e0" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "f22a4d9867954ebe421238733c7fab3c" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "5fc7527d1e798c988048402eee7453b8" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "1df7d9af89d9d92a1602e82e20634a47" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "611fee9523365daa7bd74448f460f030" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "f37f9bae05d4d5f388baa70020f85606" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "8620c986cafec41c6ef8f856839da7cf" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "d94d6b2cecfd309a3489f2f28bb967a0" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "75a849d263d279b21150479aa8b61256" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "19ca5eeb35bc26bf0d39de76f439bd87" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "cb9659ddcb42461c1348da4776d9b57e" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.20.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "c943a084592ee2d19ac1f5a4e9d59038" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "e7e7dd75bea85fab11235674603f3a3a" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "aef4d0b4d7a364ac51fc755dd05ae04e" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "d35a8a381c8517ef72a42849602c6519" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "de71f9e8cb58dbc1f6970e0228232513" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "7e248b601d632decf76fd3f076e3888a" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "ef385194f21f1e40b54926c676b4573f" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.22.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "7baf47f3e75445d87299f742e98ce3ff" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "0ae90b849f43202289e6e039c4e8690a" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 33180672, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 9216 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33039360 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33046528 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33053696 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33062912 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33070080 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33077248 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33086464 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33093632 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33100800 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33110016 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33117184 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33124352 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33133568 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33140736 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33147904 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33157120 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33164288 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33171456 + } + ], + "md5sum": "b4b876946fc566f4217db081e3a96448" + } + ] +} \ No newline at end of file