diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2ef76a52bd00b17e2339c0cbb65077529c893b51 --- /dev/null +++ b/config.json @@ -0,0 +1,29 @@ +{ + "_name_or_path": "merged_marcoroni70b", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 4096, + "model_type": "llama", + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pad_token_id": 0, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.36.2", + "use_cache": false, + "vocab_size": 32000 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5786877b25c8295c74590b8a5aace96539c44dd2 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,8 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "pad_token_id": 0, + "transformers_version": "4.36.2", + "use_cache": false +} diff --git a/model-00001-of-00061.safetensors b/model-00001-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..74dab1185fd39cdf6c07840a348c91b25fe6e55b --- /dev/null +++ b/model-00001-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c921454a21d2c14b80ee950a05f42ecc36f2b44a382dcaa58f627911d4cfc78 +size 4806739440 diff --git a/model-00002-of-00061.safetensors b/model-00002-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5a213694aa638897853eb823a91717d196b543f2 --- /dev/null +++ b/model-00002-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc6a497911d6e54cf79fe48a19d88a1874e123e1382f8b0825f80f86d94bf67a +size 4630578440 diff --git a/model-00003-of-00061.safetensors b/model-00003-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..768612ffaf8d5f9f021a554c0ff7225245a6e923 --- /dev/null +++ b/model-00003-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bb7cdf4b22b39aea4aec8a4545d137b271ff58ed20f0e416cdf9f516b1f5638 +size 4362142864 diff --git a/model-00004-of-00061.safetensors b/model-00004-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3b0497a8953a6b3efbfe2a5d06a805a6b2b12ff7 --- /dev/null +++ b/model-00004-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3acaaeb105328a6cd1beb6dbbec05a8e206faaa507afa92186bf0dfe620dca9a +size 4966188864 diff --git a/model-00005-of-00061.safetensors b/model-00005-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..149a8ea5ce058f0a4ba48efbf2b5bc6a85cd199c --- /dev/null +++ b/model-00005-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3a07113797690bcd5b09ebd0e5091415734730e1073a519d0ad868cf32fb018 +size 4362142864 diff --git a/model-00006-of-00061.safetensors b/model-00006-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..73f757ee9bb53a91c911648c495feb6f300c7b06 --- /dev/null +++ b/model-00006-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f685a6bec89b561abeed2d2d50a048f6360e18d41da6096c76c921c01d0a7726 +size 4362142864 diff --git a/model-00007-of-00061.safetensors b/model-00007-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5d2b77917fce4222bf85a12176288567b29d59a6 --- /dev/null +++ b/model-00007-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7879bf679306240982de17b19d7d1b5c8a61f502a6755814cf838e2344f7076 +size 4966188864 diff --git a/model-00008-of-00061.safetensors b/model-00008-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1058970779207ccfce8e97b176ef42e05e382766 --- /dev/null +++ b/model-00008-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acc9d683a48d957473b7190e2fc19438cd42114e815afa74ff11d5761cc31a44 +size 4362142880 diff --git a/model-00009-of-00061.safetensors b/model-00009-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..814a96978037423f62a42a55e7cd5f0e61f90b3b --- /dev/null +++ b/model-00009-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e65c0be11c75254b5a72851119a5c378c6cbde9f775c6395b592fef1ec5b933 +size 4362142872 diff --git a/model-00010-of-00061.safetensors b/model-00010-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b73a1f387bda44db35d8e9e558e96431c2ca2b0e --- /dev/null +++ b/model-00010-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:833b9629fd2797930dab0ea203cddbbb5fa1dc146e402fbaf53b4b342dd5b9c9 +size 4966188880 diff --git a/model-00011-of-00061.safetensors b/model-00011-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..33db87ffa05d9d6d2bddf35b5006c67ab8d56684 --- /dev/null +++ b/model-00011-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec8d428c5af18e27350d237b9042370fdf2503ec7acc6494e3b44a7c14da622e +size 4362142872 diff --git a/model-00012-of-00061.safetensors b/model-00012-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..174578ce0bccebb32a4941f636685eb6a180770c --- /dev/null +++ b/model-00012-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2af8795d69ed71e1a06d2dbbfd5b48439a46141bb90b795553cc758a0c562ea4 +size 4362142872 diff --git a/model-00013-of-00061.safetensors b/model-00013-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8f802e609b42695ce9d690e41bccc938fbeb8f94 --- /dev/null +++ b/model-00013-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ee7401f22503919607e109185c270e34d2f3ca723eb09e263a0f9bd94e66c94 +size 4966188880 diff --git a/model-00014-of-00061.safetensors b/model-00014-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..98c3d2b6a41aadae68c1958f6293c3c5dd7c200f --- /dev/null +++ b/model-00014-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8603add4dd9382107bb77c6bd04b365e32a3ce2a6ef154d0c7a06a8131fce16 +size 4362142872 diff --git a/model-00015-of-00061.safetensors b/model-00015-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e9948cfffdd3cacbac6187398760e468cf3856c8 --- /dev/null +++ b/model-00015-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:007854ada89f3f97a2d7281c8903dbb471751391dfb0f99bc2f441609f0c8309 +size 4362142872 diff --git a/model-00016-of-00061.safetensors b/model-00016-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..174ca0a3dca0cde0072b35be0f912d9cdad7b7d3 --- /dev/null +++ b/model-00016-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c5359e3fbe06ac2c5f4226367b9862bbe9ec52f023a2b3903abc47f7ebf94a6 +size 4966188880 diff --git a/model-00017-of-00061.safetensors b/model-00017-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e00959628afd66824e55fcbffd7e4e70f2a0037e --- /dev/null +++ b/model-00017-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11c1ff4222fc0bfc7384d9d786402aeb78de40f4233691be91a702aadcb2d7d0 +size 4362142872 diff --git a/model-00018-of-00061.safetensors b/model-00018-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..189cdb219dd9b00dc9125a31f39203fc92b79c50 --- /dev/null +++ b/model-00018-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c37be7fc13e5be411457381725b07effdff88ed3eb8cfa2ddfd73b24a818df57 +size 4362142872 diff --git a/model-00019-of-00061.safetensors b/model-00019-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e369eb5a19db83932465405e88c4a41031b3c8e5 --- /dev/null +++ b/model-00019-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:699f5e55ab8af815a8077bdfcd799cbde6d32cd2a8eed998aae0dedf878b7e41 +size 4966188880 diff --git a/model-00020-of-00061.safetensors b/model-00020-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a95473cfc0f493dddc9cd60292b9b9a1643f1ab7 --- /dev/null +++ b/model-00020-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bfed75586618d1423e0d85b030ef09758ecf68ce3d287c43324dc281e2977e5 +size 4362142872 diff --git a/model-00021-of-00061.safetensors b/model-00021-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d15830ac94d044192b46d3dfe0860ca43d1c92bd --- /dev/null +++ b/model-00021-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3dc977d1e3642cf1d09c9359a8c09333a3960ef6ef1c96b47086472ceedbde5 +size 4362142872 diff --git a/model-00022-of-00061.safetensors b/model-00022-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..59ee3a2f1e4721a40aabf0dc1acdf3ab60686875 --- /dev/null +++ b/model-00022-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a48af4b56cb5f36ae09912a4aea1999522b80faf83eaf987e4a96e1cac763e01 +size 4966188880 diff --git a/model-00023-of-00061.safetensors b/model-00023-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6e8527ef9d655a1cb21b7af2146ab5e2a7287863 --- /dev/null +++ b/model-00023-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84dd5e109fedaa08bd68da7ff92c3ae47a14f902c04d249a6ba79240bcab2374 +size 4362142872 diff --git a/model-00024-of-00061.safetensors b/model-00024-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9d02a96e497a666f69d48aa33683eb7339517549 --- /dev/null +++ b/model-00024-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50a71a3b20c05490d0ebfa4705bba954162ddca20d61fbdb434ee8ed03effe47 +size 4362142872 diff --git a/model-00025-of-00061.safetensors b/model-00025-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3be92aac727c1884498e267f99913e011c59de5a --- /dev/null +++ b/model-00025-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:150f593b91b8e37d3f1534dc7fa08929dec9296f50e949977dcda9f9e4ebe8e5 +size 4966188880 diff --git a/model-00026-of-00061.safetensors b/model-00026-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dcc49f45e29ade49b283759d3eb2438197def208 --- /dev/null +++ b/model-00026-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2deffdd7aa5fd025f13634d2185c4831bb7a1d2eb895fd446604b1def0b08484 +size 4362142872 diff --git a/model-00027-of-00061.safetensors b/model-00027-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e52d8b6ce3183466276f334c762744f0b02eaa0f --- /dev/null +++ b/model-00027-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ebb73f23107966e39d94d4ba043166d9e9e83320818b33d9f4ea0440eb72113 +size 4362142872 diff --git a/model-00028-of-00061.safetensors b/model-00028-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ae4fc81df5e6adfa74b8a0f07a5f9c89472de8e1 --- /dev/null +++ b/model-00028-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3181d787fa6a9c47741d7c70375cfd4566ac5275deb85b80cc9680b2c6b4f54 +size 4966188880 diff --git a/model-00029-of-00061.safetensors b/model-00029-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..43442ea6aa1a42413a66fbb98a1a57184226a386 --- /dev/null +++ b/model-00029-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a30b9d4e0f70d6a7737d975dd6539dbdda47a270748c1c0b2cb843cf9fa98aba +size 4362142872 diff --git a/model-00030-of-00061.safetensors b/model-00030-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d44a2bbacd9568ba5fe3bc760fbaf4531a9dc2d3 --- /dev/null +++ b/model-00030-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38b2d4a4ca167ef885d2e6347c018f45f0ab247ebf434a0f4c7590c683dba96c +size 4362142872 diff --git a/model-00031-of-00061.safetensors b/model-00031-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..501abe3e35bb4a33216d576e81b3da789202c15a --- /dev/null +++ b/model-00031-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:634c36e9b3bf11f9a0705a2b218d769c5ecc457227aa0b1b69ec3ea9185e7721 +size 4966188880 diff --git a/model-00032-of-00061.safetensors b/model-00032-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e386fc3f4f53cbd7e4acb7287c3ea1412f87fbb9 --- /dev/null +++ b/model-00032-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d730cb66474233b5da031179301133298e5e2e40ec1a8b5c6583187a0042932d +size 4362142872 diff --git a/model-00033-of-00061.safetensors b/model-00033-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..518816f46e5b72b1490c8deb718cbd0da75c5103 --- /dev/null +++ b/model-00033-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fb9a14b0ad587092fa350b2e2ba70ab689030e5de5136f9435f195cc219f78d +size 4362142872 diff --git a/model-00034-of-00061.safetensors b/model-00034-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..faca52072f3d4ae82f2921c2da141077df4892a6 --- /dev/null +++ b/model-00034-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e99c2e241a386b946dfa9de6c2f2b80ec80f99733a586bda62b2d2ff83d0a977 +size 4966188880 diff --git a/model-00035-of-00061.safetensors b/model-00035-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4bc89284a2f1547a5b97fd29e9bf0dffa1e60662 --- /dev/null +++ b/model-00035-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1487c296ecf3c1955d32c5b8d36c768830a14919958bd734f4f60993e787bfbe +size 4362142872 diff --git a/model-00036-of-00061.safetensors b/model-00036-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..afd6747e4bf98d1ab7abcfa4ab06d387c32e6558 --- /dev/null +++ b/model-00036-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c92b2d0eeb416f96052a7eb82a84065193d6b76b209a6b32767066c81da2fe08 +size 4362142872 diff --git a/model-00037-of-00061.safetensors b/model-00037-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d1ed5b10f53d7ec8ed38855516e521a0556d965a --- /dev/null +++ b/model-00037-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f6941e407b38a38999e9eb16b823760e75654704c3c9632cdb63954f91f6597 +size 4966188880 diff --git a/model-00038-of-00061.safetensors b/model-00038-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ba0287f2fc4bfc670cbc08fa1d22132d58c4482b --- /dev/null +++ b/model-00038-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ba77213ae35e9e785ac16d74ee8eb5cbdbef1b03ba8f7f2fb6a8711ff58df3e +size 4362142872 diff --git a/model-00039-of-00061.safetensors b/model-00039-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0bc7a0a85e2f82c326ceac250f8ee8cf80c06d20 --- /dev/null +++ b/model-00039-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50bfdbcbcc0253f1d755599bbc777a1e90c1995924d776299bbcf6b9005f3f40 +size 4362142872 diff --git a/model-00040-of-00061.safetensors b/model-00040-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cd485c9aca91dbde7adf7bbb50582aa4b118e335 --- /dev/null +++ b/model-00040-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7548698a52f67879ed7333e01296f59d0924756faf718e13f7cc6af0e35672d4 +size 4966188880 diff --git a/model-00041-of-00061.safetensors b/model-00041-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b960b213033938225ea96f6f0cbc2df9ad1ebd56 --- /dev/null +++ b/model-00041-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c062571485cd736c62d949f2fabc4deecc80ed7192e3cd59b815331ea8ce13eb +size 4362142872 diff --git a/model-00042-of-00061.safetensors b/model-00042-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..257cb3e2cdd5ab54cc87f2998ee39986ca64922b --- /dev/null +++ b/model-00042-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7e712a14133c9a2e1a7ed50edc4163712431df86d944a227b1acafde74da6cb +size 4362142872 diff --git a/model-00043-of-00061.safetensors b/model-00043-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..40e49a31173d51a10c4142e05ee284e067cf78b4 --- /dev/null +++ b/model-00043-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6943e2137d3da0eed59ada7bf8dadac1c05016835d199d83ce6d3bac7ee6c3e2 +size 4966188880 diff --git a/model-00044-of-00061.safetensors b/model-00044-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9e6b2b2791d4d4e4e964395985c714551d1c78e5 --- /dev/null +++ b/model-00044-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2207531330d874f2fd3f2d9971ef4f6847355ce5c655a5a5ea65b5df5dad8022 +size 4362142872 diff --git a/model-00045-of-00061.safetensors b/model-00045-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1ddf78073bce3b19bc138efe4ed4254b055c6ccb --- /dev/null +++ b/model-00045-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7267b73fce2a4d4f63643c2ef281e91157aac7e4cde784eb875b431d8813ef9 +size 4362142872 diff --git a/model-00046-of-00061.safetensors b/model-00046-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eed2424a848bee410b22dd18f37e5850eec14e32 --- /dev/null +++ b/model-00046-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2bddfc5288852364407679c384bf7fcfc6a81638ccd03b0c1fc63367cae1a9d +size 4966188880 diff --git a/model-00047-of-00061.safetensors b/model-00047-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8115dff5c74ffb68e54b5e93380ceab941548998 --- /dev/null +++ b/model-00047-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bf30fb3f82c73ac064c826e888cde87fb6c07722ba139fa07a570bf4a037a2e +size 4362142872 diff --git a/model-00048-of-00061.safetensors b/model-00048-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d5089b30fb6af87c4d22332db7a0946645e75b9d --- /dev/null +++ b/model-00048-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81da0ee7520aa5feec78297401a97a877a796c4a70c92e04b72c7cc5c7fbafe0 +size 4362142872 diff --git a/model-00049-of-00061.safetensors b/model-00049-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e17d24ce7500b97c369ead03c0d2a8236fdd8f19 --- /dev/null +++ b/model-00049-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:900c43276048fcbe821233ab3a4ef74ed8a3e6e17e1e1efa525e12242fed27ca +size 4966188880 diff --git a/model-00050-of-00061.safetensors b/model-00050-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..33806e453efff42dbceb0ea327c3eb7a2cc180ac --- /dev/null +++ b/model-00050-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:060856b8b99bff8e1a0c5ff387a7ed0ba10ff1627c2c040a9c56a41693eb14fd +size 4362142872 diff --git a/model-00051-of-00061.safetensors b/model-00051-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..507da75409b85436ad14a524f90445698f1995d4 --- /dev/null +++ b/model-00051-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08361dfbc01cfa8fd866a879015c862e6b449bf1909b2f6cce765e7230ad8e6e +size 4362142872 diff --git a/model-00052-of-00061.safetensors b/model-00052-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..35cad827b7246e23082e8c81604096037833d710 --- /dev/null +++ b/model-00052-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:086c646e11008a5706a5c5fd68d59446f4840471aa6164a0e361eea798b5fcef +size 4966188880 diff --git a/model-00053-of-00061.safetensors b/model-00053-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d5d1ca680dbbc15e6dde50a9d05d18b6cad8f588 --- /dev/null +++ b/model-00053-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3414013e1289de2b9374dad241af9be4f6d8bfef86b72f75e59c42c938d37a2 +size 4362142872 diff --git a/model-00054-of-00061.safetensors b/model-00054-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..389ebdfc074c3c83310a7929fb3886e667fb178b --- /dev/null +++ b/model-00054-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c97fa88e60d52b21163a28707f99370fde782309abe6205a9e470c3b73f0ff8 +size 4362142872 diff --git a/model-00055-of-00061.safetensors b/model-00055-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..444928e2034a6f40650ec6f77b5eac68e4d65c2a --- /dev/null +++ b/model-00055-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0142914344de9b6570fad0668355d4fe6e3c79b2037cb9895e7f5d5e53afc11 +size 4966188880 diff --git a/model-00056-of-00061.safetensors b/model-00056-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f41684160eb51012b4562239dce30b8e6d30e5e5 --- /dev/null +++ b/model-00056-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5aa7bd2ec9b4a36979c1cc2961d1e847029f7c8c6287f2f79a927dd901235d19 +size 4362142872 diff --git a/model-00057-of-00061.safetensors b/model-00057-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..021d6460394028f2031f249891ad4e6f4d0b490b --- /dev/null +++ b/model-00057-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e70031923ebcf09a2885c8d09687683b2a94b9b7dace3e034abb37cf37b3bbe +size 4362142872 diff --git a/model-00058-of-00061.safetensors b/model-00058-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f174e219c90fdc2eadecf345ed5f5e4ef2e02cfe --- /dev/null +++ b/model-00058-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2abdf7bac2c7eed69152db58d610577f29f7fe68a7bdf1823d0f1fa92446ee35 +size 4966188880 diff --git a/model-00059-of-00061.safetensors b/model-00059-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f2bf77eccbfb2b057d1757d97ac0b5778e01bf26 --- /dev/null +++ b/model-00059-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb203f71c99fcb2666cfb33133bada3e66410fb3b5240debd7fb4dbdf7380879 +size 4362142872 diff --git a/model-00060-of-00061.safetensors b/model-00060-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8a4e55eabc80a64015ff577f09369adbdb9f55cc --- /dev/null +++ b/model-00060-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c25cf6892509929633e2a0ced3e106ed262d05c77953a1cd6b0859b441ec2f87 +size 4362142872 diff --git a/model-00061-of-00061.safetensors b/model-00061-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e4cbe28643e551de517d6a6020064726a95047a2 --- /dev/null +++ b/model-00061-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed40c0dea7f512d96865c6c6c110a0a82363608fadb84c0338bb97b8f628a9a6 +size 1988198960 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..4156aa9dfeed60097cda66ec60af33ad1607de57 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,730 @@ +{ + "metadata": { + "total_size": 275906592768 + }, + "weight_map": { + "lm_head.weight": "model-00061-of-00061.safetensors", + "model.embed_tokens.weight": "model-00001-of-00061.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00061.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00061.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00061.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00061.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00061.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00061.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00061.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00061.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00061.safetensors", + "model.layers.1.input_layernorm.weight": "model-00002-of-00061.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00002-of-00061.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00002-of-00061.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00002-of-00061.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00002-of-00061.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00061.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00061.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00061.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00061.safetensors", + "model.layers.10.input_layernorm.weight": "model-00009-of-00061.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00009-of-00061.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00008-of-00061.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00009-of-00061.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00009-of-00061.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00008-of-00061.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00008-of-00061.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00008-of-00061.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00008-of-00061.safetensors", + "model.layers.11.input_layernorm.weight": "model-00010-of-00061.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00010-of-00061.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00009-of-00061.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00009-of-00061.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00010-of-00061.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00009-of-00061.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00009-of-00061.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00009-of-00061.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00009-of-00061.safetensors", + "model.layers.12.input_layernorm.weight": "model-00010-of-00061.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00010-of-00061.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00010-of-00061.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00010-of-00061.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00010-of-00061.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00010-of-00061.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00010-of-00061.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00010-of-00061.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00010-of-00061.safetensors", + "model.layers.13.input_layernorm.weight": "model-00011-of-00061.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00011-of-00061.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00011-of-00061.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00011-of-00061.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00011-of-00061.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00010-of-00061.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00010-of-00061.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00010-of-00061.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00010-of-00061.safetensors", + "model.layers.14.input_layernorm.weight": "model-00012-of-00061.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00012-of-00061.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00011-of-00061.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00012-of-00061.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00012-of-00061.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00011-of-00061.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00011-of-00061.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00011-of-00061.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00011-of-00061.safetensors", + "model.layers.15.input_layernorm.weight": "model-00013-of-00061.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00013-of-00061.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00012-of-00061.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00012-of-00061.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00013-of-00061.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00012-of-00061.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00012-of-00061.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00012-of-00061.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00012-of-00061.safetensors", + "model.layers.16.input_layernorm.weight": "model-00013-of-00061.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00013-of-00061.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00013-of-00061.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00013-of-00061.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00013-of-00061.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00013-of-00061.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00013-of-00061.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00013-of-00061.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00013-of-00061.safetensors", + "model.layers.17.input_layernorm.weight": "model-00014-of-00061.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00014-of-00061.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00014-of-00061.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00014-of-00061.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00014-of-00061.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00013-of-00061.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00013-of-00061.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00013-of-00061.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00013-of-00061.safetensors", + "model.layers.18.input_layernorm.weight": "model-00015-of-00061.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00015-of-00061.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00014-of-00061.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00015-of-00061.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00015-of-00061.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00014-of-00061.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00014-of-00061.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00014-of-00061.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00014-of-00061.safetensors", + "model.layers.19.input_layernorm.weight": "model-00016-of-00061.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00016-of-00061.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00015-of-00061.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00015-of-00061.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00016-of-00061.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00015-of-00061.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00015-of-00061.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00015-of-00061.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00015-of-00061.safetensors", + "model.layers.2.input_layernorm.weight": "model-00003-of-00061.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00003-of-00061.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00002-of-00061.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00003-of-00061.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00003-of-00061.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00002-of-00061.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00002-of-00061.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00002-of-00061.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00002-of-00061.safetensors", + "model.layers.20.input_layernorm.weight": "model-00016-of-00061.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00016-of-00061.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00016-of-00061.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00016-of-00061.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00016-of-00061.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00016-of-00061.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00016-of-00061.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00016-of-00061.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00016-of-00061.safetensors", + "model.layers.21.input_layernorm.weight": "model-00017-of-00061.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00017-of-00061.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00017-of-00061.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00017-of-00061.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00017-of-00061.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00016-of-00061.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00016-of-00061.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00016-of-00061.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00016-of-00061.safetensors", + "model.layers.22.input_layernorm.weight": "model-00018-of-00061.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00018-of-00061.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00017-of-00061.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00018-of-00061.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00018-of-00061.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00017-of-00061.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00017-of-00061.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00017-of-00061.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00017-of-00061.safetensors", + "model.layers.23.input_layernorm.weight": "model-00019-of-00061.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00019-of-00061.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00018-of-00061.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00018-of-00061.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00019-of-00061.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00018-of-00061.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00018-of-00061.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00018-of-00061.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00018-of-00061.safetensors", + "model.layers.24.input_layernorm.weight": "model-00019-of-00061.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00019-of-00061.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00019-of-00061.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00019-of-00061.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00019-of-00061.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00019-of-00061.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00019-of-00061.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00019-of-00061.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00019-of-00061.safetensors", + "model.layers.25.input_layernorm.weight": "model-00020-of-00061.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00020-of-00061.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00020-of-00061.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00020-of-00061.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00020-of-00061.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00019-of-00061.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00019-of-00061.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00019-of-00061.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00019-of-00061.safetensors", + "model.layers.26.input_layernorm.weight": "model-00021-of-00061.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00021-of-00061.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00020-of-00061.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00021-of-00061.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00021-of-00061.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00020-of-00061.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00020-of-00061.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00020-of-00061.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00020-of-00061.safetensors", + "model.layers.27.input_layernorm.weight": "model-00022-of-00061.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00022-of-00061.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00021-of-00061.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00021-of-00061.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00022-of-00061.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00021-of-00061.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00021-of-00061.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00021-of-00061.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00021-of-00061.safetensors", + "model.layers.28.input_layernorm.weight": "model-00022-of-00061.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00022-of-00061.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00022-of-00061.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00022-of-00061.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00022-of-00061.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00022-of-00061.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00022-of-00061.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00022-of-00061.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00022-of-00061.safetensors", + "model.layers.29.input_layernorm.weight": "model-00023-of-00061.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00023-of-00061.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00023-of-00061.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00023-of-00061.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00023-of-00061.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00022-of-00061.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00022-of-00061.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00022-of-00061.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00022-of-00061.safetensors", + "model.layers.3.input_layernorm.weight": "model-00004-of-00061.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00004-of-00061.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00003-of-00061.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00003-of-00061.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00004-of-00061.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00003-of-00061.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00003-of-00061.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00003-of-00061.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00003-of-00061.safetensors", + "model.layers.30.input_layernorm.weight": "model-00024-of-00061.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00024-of-00061.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00023-of-00061.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00024-of-00061.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00024-of-00061.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00023-of-00061.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00023-of-00061.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00023-of-00061.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00023-of-00061.safetensors", + "model.layers.31.input_layernorm.weight": "model-00025-of-00061.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00025-of-00061.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00024-of-00061.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00024-of-00061.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00025-of-00061.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00024-of-00061.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00024-of-00061.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00024-of-00061.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00024-of-00061.safetensors", + "model.layers.32.input_layernorm.weight": "model-00025-of-00061.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00025-of-00061.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00025-of-00061.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00025-of-00061.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00025-of-00061.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00025-of-00061.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00025-of-00061.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00025-of-00061.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00025-of-00061.safetensors", + "model.layers.33.input_layernorm.weight": "model-00026-of-00061.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00026-of-00061.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00026-of-00061.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00026-of-00061.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00026-of-00061.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00025-of-00061.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00025-of-00061.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00025-of-00061.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00025-of-00061.safetensors", + "model.layers.34.input_layernorm.weight": "model-00027-of-00061.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00027-of-00061.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00026-of-00061.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00027-of-00061.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00027-of-00061.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00026-of-00061.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00026-of-00061.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00026-of-00061.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00026-of-00061.safetensors", + "model.layers.35.input_layernorm.weight": "model-00028-of-00061.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00028-of-00061.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00027-of-00061.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00027-of-00061.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00028-of-00061.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00027-of-00061.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00027-of-00061.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00027-of-00061.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00027-of-00061.safetensors", + "model.layers.36.input_layernorm.weight": "model-00028-of-00061.safetensors", + "model.layers.36.mlp.down_proj.weight": "model-00028-of-00061.safetensors", + "model.layers.36.mlp.gate_proj.weight": "model-00028-of-00061.safetensors", + "model.layers.36.mlp.up_proj.weight": "model-00028-of-00061.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00028-of-00061.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00028-of-00061.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00028-of-00061.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00028-of-00061.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00028-of-00061.safetensors", + "model.layers.37.input_layernorm.weight": "model-00029-of-00061.safetensors", + "model.layers.37.mlp.down_proj.weight": "model-00029-of-00061.safetensors", + "model.layers.37.mlp.gate_proj.weight": "model-00029-of-00061.safetensors", + "model.layers.37.mlp.up_proj.weight": "model-00029-of-00061.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00029-of-00061.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00028-of-00061.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00028-of-00061.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00028-of-00061.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00028-of-00061.safetensors", + "model.layers.38.input_layernorm.weight": "model-00030-of-00061.safetensors", + "model.layers.38.mlp.down_proj.weight": "model-00030-of-00061.safetensors", + "model.layers.38.mlp.gate_proj.weight": "model-00029-of-00061.safetensors", + "model.layers.38.mlp.up_proj.weight": "model-00030-of-00061.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00030-of-00061.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00029-of-00061.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00029-of-00061.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00029-of-00061.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00029-of-00061.safetensors", + "model.layers.39.input_layernorm.weight": "model-00031-of-00061.safetensors", + "model.layers.39.mlp.down_proj.weight": "model-00031-of-00061.safetensors", + "model.layers.39.mlp.gate_proj.weight": "model-00030-of-00061.safetensors", + "model.layers.39.mlp.up_proj.weight": "model-00030-of-00061.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00031-of-00061.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00030-of-00061.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00030-of-00061.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00030-of-00061.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00030-of-00061.safetensors", + "model.layers.4.input_layernorm.weight": "model-00004-of-00061.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00004-of-00061.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00004-of-00061.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00004-of-00061.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00004-of-00061.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00004-of-00061.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00004-of-00061.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00004-of-00061.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00004-of-00061.safetensors", + "model.layers.40.input_layernorm.weight": "model-00031-of-00061.safetensors", + "model.layers.40.mlp.down_proj.weight": "model-00031-of-00061.safetensors", + "model.layers.40.mlp.gate_proj.weight": "model-00031-of-00061.safetensors", + "model.layers.40.mlp.up_proj.weight": "model-00031-of-00061.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00031-of-00061.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00031-of-00061.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00031-of-00061.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00031-of-00061.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00031-of-00061.safetensors", + "model.layers.41.input_layernorm.weight": "model-00032-of-00061.safetensors", + "model.layers.41.mlp.down_proj.weight": "model-00032-of-00061.safetensors", + "model.layers.41.mlp.gate_proj.weight": "model-00032-of-00061.safetensors", + "model.layers.41.mlp.up_proj.weight": "model-00032-of-00061.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00032-of-00061.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00031-of-00061.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00031-of-00061.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00031-of-00061.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00031-of-00061.safetensors", + "model.layers.42.input_layernorm.weight": "model-00033-of-00061.safetensors", + "model.layers.42.mlp.down_proj.weight": "model-00033-of-00061.safetensors", + "model.layers.42.mlp.gate_proj.weight": "model-00032-of-00061.safetensors", + "model.layers.42.mlp.up_proj.weight": "model-00033-of-00061.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00033-of-00061.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00032-of-00061.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00032-of-00061.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00032-of-00061.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00032-of-00061.safetensors", + "model.layers.43.input_layernorm.weight": "model-00034-of-00061.safetensors", + "model.layers.43.mlp.down_proj.weight": "model-00034-of-00061.safetensors", + "model.layers.43.mlp.gate_proj.weight": "model-00033-of-00061.safetensors", + "model.layers.43.mlp.up_proj.weight": "model-00033-of-00061.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00034-of-00061.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00033-of-00061.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00033-of-00061.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00033-of-00061.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00033-of-00061.safetensors", + "model.layers.44.input_layernorm.weight": "model-00034-of-00061.safetensors", + "model.layers.44.mlp.down_proj.weight": "model-00034-of-00061.safetensors", + "model.layers.44.mlp.gate_proj.weight": "model-00034-of-00061.safetensors", + "model.layers.44.mlp.up_proj.weight": "model-00034-of-00061.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00034-of-00061.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00034-of-00061.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00034-of-00061.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00034-of-00061.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00034-of-00061.safetensors", + "model.layers.45.input_layernorm.weight": "model-00035-of-00061.safetensors", + "model.layers.45.mlp.down_proj.weight": "model-00035-of-00061.safetensors", + "model.layers.45.mlp.gate_proj.weight": "model-00035-of-00061.safetensors", + "model.layers.45.mlp.up_proj.weight": "model-00035-of-00061.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00035-of-00061.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00034-of-00061.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00034-of-00061.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00034-of-00061.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00034-of-00061.safetensors", + "model.layers.46.input_layernorm.weight": "model-00036-of-00061.safetensors", + "model.layers.46.mlp.down_proj.weight": "model-00036-of-00061.safetensors", + "model.layers.46.mlp.gate_proj.weight": "model-00035-of-00061.safetensors", + "model.layers.46.mlp.up_proj.weight": "model-00036-of-00061.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00036-of-00061.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00035-of-00061.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00035-of-00061.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00035-of-00061.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00035-of-00061.safetensors", + "model.layers.47.input_layernorm.weight": "model-00037-of-00061.safetensors", + "model.layers.47.mlp.down_proj.weight": "model-00037-of-00061.safetensors", + "model.layers.47.mlp.gate_proj.weight": "model-00036-of-00061.safetensors", + "model.layers.47.mlp.up_proj.weight": "model-00036-of-00061.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00037-of-00061.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00036-of-00061.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00036-of-00061.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00036-of-00061.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00036-of-00061.safetensors", + "model.layers.48.input_layernorm.weight": "model-00037-of-00061.safetensors", + "model.layers.48.mlp.down_proj.weight": "model-00037-of-00061.safetensors", + "model.layers.48.mlp.gate_proj.weight": "model-00037-of-00061.safetensors", + "model.layers.48.mlp.up_proj.weight": "model-00037-of-00061.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00037-of-00061.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00037-of-00061.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00037-of-00061.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00037-of-00061.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00037-of-00061.safetensors", + "model.layers.49.input_layernorm.weight": "model-00038-of-00061.safetensors", + "model.layers.49.mlp.down_proj.weight": "model-00038-of-00061.safetensors", + "model.layers.49.mlp.gate_proj.weight": "model-00038-of-00061.safetensors", + "model.layers.49.mlp.up_proj.weight": "model-00038-of-00061.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00038-of-00061.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00037-of-00061.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00037-of-00061.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00037-of-00061.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00037-of-00061.safetensors", + "model.layers.5.input_layernorm.weight": "model-00005-of-00061.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00005-of-00061.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00005-of-00061.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00005-of-00061.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00005-of-00061.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00004-of-00061.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00004-of-00061.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00004-of-00061.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00004-of-00061.safetensors", + "model.layers.50.input_layernorm.weight": "model-00039-of-00061.safetensors", + "model.layers.50.mlp.down_proj.weight": "model-00039-of-00061.safetensors", + "model.layers.50.mlp.gate_proj.weight": "model-00038-of-00061.safetensors", + "model.layers.50.mlp.up_proj.weight": "model-00039-of-00061.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00039-of-00061.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00038-of-00061.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00038-of-00061.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00038-of-00061.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00038-of-00061.safetensors", + "model.layers.51.input_layernorm.weight": "model-00040-of-00061.safetensors", + "model.layers.51.mlp.down_proj.weight": "model-00040-of-00061.safetensors", + "model.layers.51.mlp.gate_proj.weight": "model-00039-of-00061.safetensors", + "model.layers.51.mlp.up_proj.weight": "model-00039-of-00061.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00040-of-00061.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00039-of-00061.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00039-of-00061.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00039-of-00061.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00039-of-00061.safetensors", + "model.layers.52.input_layernorm.weight": "model-00040-of-00061.safetensors", + "model.layers.52.mlp.down_proj.weight": "model-00040-of-00061.safetensors", + "model.layers.52.mlp.gate_proj.weight": "model-00040-of-00061.safetensors", + "model.layers.52.mlp.up_proj.weight": "model-00040-of-00061.safetensors", + "model.layers.52.post_attention_layernorm.weight": "model-00040-of-00061.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00040-of-00061.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00040-of-00061.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00040-of-00061.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00040-of-00061.safetensors", + "model.layers.53.input_layernorm.weight": "model-00041-of-00061.safetensors", + "model.layers.53.mlp.down_proj.weight": "model-00041-of-00061.safetensors", + "model.layers.53.mlp.gate_proj.weight": "model-00041-of-00061.safetensors", + "model.layers.53.mlp.up_proj.weight": "model-00041-of-00061.safetensors", + "model.layers.53.post_attention_layernorm.weight": "model-00041-of-00061.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00040-of-00061.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00040-of-00061.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00040-of-00061.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00040-of-00061.safetensors", + "model.layers.54.input_layernorm.weight": "model-00042-of-00061.safetensors", + "model.layers.54.mlp.down_proj.weight": "model-00042-of-00061.safetensors", + "model.layers.54.mlp.gate_proj.weight": "model-00041-of-00061.safetensors", + "model.layers.54.mlp.up_proj.weight": "model-00042-of-00061.safetensors", + "model.layers.54.post_attention_layernorm.weight": "model-00042-of-00061.safetensors", + "model.layers.54.self_attn.k_proj.weight": "model-00041-of-00061.safetensors", + "model.layers.54.self_attn.o_proj.weight": "model-00041-of-00061.safetensors", + "model.layers.54.self_attn.q_proj.weight": "model-00041-of-00061.safetensors", + "model.layers.54.self_attn.v_proj.weight": "model-00041-of-00061.safetensors", + "model.layers.55.input_layernorm.weight": "model-00043-of-00061.safetensors", + "model.layers.55.mlp.down_proj.weight": "model-00043-of-00061.safetensors", + "model.layers.55.mlp.gate_proj.weight": "model-00042-of-00061.safetensors", + "model.layers.55.mlp.up_proj.weight": "model-00042-of-00061.safetensors", + "model.layers.55.post_attention_layernorm.weight": "model-00043-of-00061.safetensors", + "model.layers.55.self_attn.k_proj.weight": "model-00042-of-00061.safetensors", + "model.layers.55.self_attn.o_proj.weight": "model-00042-of-00061.safetensors", + "model.layers.55.self_attn.q_proj.weight": "model-00042-of-00061.safetensors", + "model.layers.55.self_attn.v_proj.weight": "model-00042-of-00061.safetensors", + "model.layers.56.input_layernorm.weight": "model-00043-of-00061.safetensors", + "model.layers.56.mlp.down_proj.weight": "model-00043-of-00061.safetensors", + "model.layers.56.mlp.gate_proj.weight": "model-00043-of-00061.safetensors", + "model.layers.56.mlp.up_proj.weight": "model-00043-of-00061.safetensors", + "model.layers.56.post_attention_layernorm.weight": "model-00043-of-00061.safetensors", + "model.layers.56.self_attn.k_proj.weight": "model-00043-of-00061.safetensors", + "model.layers.56.self_attn.o_proj.weight": "model-00043-of-00061.safetensors", + "model.layers.56.self_attn.q_proj.weight": "model-00043-of-00061.safetensors", + "model.layers.56.self_attn.v_proj.weight": "model-00043-of-00061.safetensors", + "model.layers.57.input_layernorm.weight": "model-00044-of-00061.safetensors", + "model.layers.57.mlp.down_proj.weight": "model-00044-of-00061.safetensors", + "model.layers.57.mlp.gate_proj.weight": "model-00044-of-00061.safetensors", + "model.layers.57.mlp.up_proj.weight": "model-00044-of-00061.safetensors", + "model.layers.57.post_attention_layernorm.weight": "model-00044-of-00061.safetensors", + "model.layers.57.self_attn.k_proj.weight": "model-00043-of-00061.safetensors", + "model.layers.57.self_attn.o_proj.weight": "model-00043-of-00061.safetensors", + "model.layers.57.self_attn.q_proj.weight": "model-00043-of-00061.safetensors", + "model.layers.57.self_attn.v_proj.weight": "model-00043-of-00061.safetensors", + "model.layers.58.input_layernorm.weight": "model-00045-of-00061.safetensors", + "model.layers.58.mlp.down_proj.weight": "model-00045-of-00061.safetensors", + "model.layers.58.mlp.gate_proj.weight": "model-00044-of-00061.safetensors", + "model.layers.58.mlp.up_proj.weight": "model-00045-of-00061.safetensors", + "model.layers.58.post_attention_layernorm.weight": "model-00045-of-00061.safetensors", + "model.layers.58.self_attn.k_proj.weight": "model-00044-of-00061.safetensors", + "model.layers.58.self_attn.o_proj.weight": "model-00044-of-00061.safetensors", + "model.layers.58.self_attn.q_proj.weight": "model-00044-of-00061.safetensors", + "model.layers.58.self_attn.v_proj.weight": "model-00044-of-00061.safetensors", + "model.layers.59.input_layernorm.weight": "model-00046-of-00061.safetensors", + "model.layers.59.mlp.down_proj.weight": "model-00046-of-00061.safetensors", + "model.layers.59.mlp.gate_proj.weight": "model-00045-of-00061.safetensors", + "model.layers.59.mlp.up_proj.weight": "model-00045-of-00061.safetensors", + "model.layers.59.post_attention_layernorm.weight": "model-00046-of-00061.safetensors", + "model.layers.59.self_attn.k_proj.weight": "model-00045-of-00061.safetensors", + "model.layers.59.self_attn.o_proj.weight": "model-00045-of-00061.safetensors", + "model.layers.59.self_attn.q_proj.weight": "model-00045-of-00061.safetensors", + "model.layers.59.self_attn.v_proj.weight": "model-00045-of-00061.safetensors", + "model.layers.6.input_layernorm.weight": "model-00006-of-00061.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00006-of-00061.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00005-of-00061.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00006-of-00061.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00006-of-00061.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00005-of-00061.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00005-of-00061.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00005-of-00061.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00005-of-00061.safetensors", + "model.layers.60.input_layernorm.weight": "model-00046-of-00061.safetensors", + "model.layers.60.mlp.down_proj.weight": "model-00046-of-00061.safetensors", + "model.layers.60.mlp.gate_proj.weight": "model-00046-of-00061.safetensors", + "model.layers.60.mlp.up_proj.weight": "model-00046-of-00061.safetensors", + "model.layers.60.post_attention_layernorm.weight": "model-00046-of-00061.safetensors", + "model.layers.60.self_attn.k_proj.weight": "model-00046-of-00061.safetensors", + "model.layers.60.self_attn.o_proj.weight": "model-00046-of-00061.safetensors", + "model.layers.60.self_attn.q_proj.weight": "model-00046-of-00061.safetensors", + "model.layers.60.self_attn.v_proj.weight": "model-00046-of-00061.safetensors", + "model.layers.61.input_layernorm.weight": "model-00047-of-00061.safetensors", + "model.layers.61.mlp.down_proj.weight": "model-00047-of-00061.safetensors", + "model.layers.61.mlp.gate_proj.weight": "model-00047-of-00061.safetensors", + "model.layers.61.mlp.up_proj.weight": "model-00047-of-00061.safetensors", + "model.layers.61.post_attention_layernorm.weight": "model-00047-of-00061.safetensors", + "model.layers.61.self_attn.k_proj.weight": "model-00046-of-00061.safetensors", + "model.layers.61.self_attn.o_proj.weight": "model-00046-of-00061.safetensors", + "model.layers.61.self_attn.q_proj.weight": "model-00046-of-00061.safetensors", + "model.layers.61.self_attn.v_proj.weight": "model-00046-of-00061.safetensors", + "model.layers.62.input_layernorm.weight": "model-00048-of-00061.safetensors", + "model.layers.62.mlp.down_proj.weight": "model-00048-of-00061.safetensors", + "model.layers.62.mlp.gate_proj.weight": "model-00047-of-00061.safetensors", + "model.layers.62.mlp.up_proj.weight": "model-00048-of-00061.safetensors", + "model.layers.62.post_attention_layernorm.weight": "model-00048-of-00061.safetensors", + "model.layers.62.self_attn.k_proj.weight": "model-00047-of-00061.safetensors", + "model.layers.62.self_attn.o_proj.weight": "model-00047-of-00061.safetensors", + "model.layers.62.self_attn.q_proj.weight": "model-00047-of-00061.safetensors", + "model.layers.62.self_attn.v_proj.weight": "model-00047-of-00061.safetensors", + "model.layers.63.input_layernorm.weight": "model-00049-of-00061.safetensors", + "model.layers.63.mlp.down_proj.weight": "model-00049-of-00061.safetensors", + "model.layers.63.mlp.gate_proj.weight": "model-00048-of-00061.safetensors", + "model.layers.63.mlp.up_proj.weight": "model-00048-of-00061.safetensors", + "model.layers.63.post_attention_layernorm.weight": "model-00049-of-00061.safetensors", + "model.layers.63.self_attn.k_proj.weight": "model-00048-of-00061.safetensors", + "model.layers.63.self_attn.o_proj.weight": "model-00048-of-00061.safetensors", + "model.layers.63.self_attn.q_proj.weight": "model-00048-of-00061.safetensors", + "model.layers.63.self_attn.v_proj.weight": "model-00048-of-00061.safetensors", + "model.layers.64.input_layernorm.weight": "model-00049-of-00061.safetensors", + "model.layers.64.mlp.down_proj.weight": "model-00049-of-00061.safetensors", + "model.layers.64.mlp.gate_proj.weight": "model-00049-of-00061.safetensors", + "model.layers.64.mlp.up_proj.weight": "model-00049-of-00061.safetensors", + "model.layers.64.post_attention_layernorm.weight": "model-00049-of-00061.safetensors", + "model.layers.64.self_attn.k_proj.weight": "model-00049-of-00061.safetensors", + "model.layers.64.self_attn.o_proj.weight": "model-00049-of-00061.safetensors", + "model.layers.64.self_attn.q_proj.weight": "model-00049-of-00061.safetensors", + "model.layers.64.self_attn.v_proj.weight": "model-00049-of-00061.safetensors", + "model.layers.65.input_layernorm.weight": "model-00050-of-00061.safetensors", + "model.layers.65.mlp.down_proj.weight": "model-00050-of-00061.safetensors", + "model.layers.65.mlp.gate_proj.weight": "model-00050-of-00061.safetensors", + "model.layers.65.mlp.up_proj.weight": "model-00050-of-00061.safetensors", + "model.layers.65.post_attention_layernorm.weight": "model-00050-of-00061.safetensors", + "model.layers.65.self_attn.k_proj.weight": "model-00049-of-00061.safetensors", + "model.layers.65.self_attn.o_proj.weight": "model-00049-of-00061.safetensors", + "model.layers.65.self_attn.q_proj.weight": "model-00049-of-00061.safetensors", + "model.layers.65.self_attn.v_proj.weight": "model-00049-of-00061.safetensors", + "model.layers.66.input_layernorm.weight": "model-00051-of-00061.safetensors", + "model.layers.66.mlp.down_proj.weight": "model-00051-of-00061.safetensors", + "model.layers.66.mlp.gate_proj.weight": "model-00050-of-00061.safetensors", + "model.layers.66.mlp.up_proj.weight": "model-00051-of-00061.safetensors", + "model.layers.66.post_attention_layernorm.weight": "model-00051-of-00061.safetensors", + "model.layers.66.self_attn.k_proj.weight": "model-00050-of-00061.safetensors", + "model.layers.66.self_attn.o_proj.weight": "model-00050-of-00061.safetensors", + "model.layers.66.self_attn.q_proj.weight": "model-00050-of-00061.safetensors", + "model.layers.66.self_attn.v_proj.weight": "model-00050-of-00061.safetensors", + "model.layers.67.input_layernorm.weight": "model-00052-of-00061.safetensors", + "model.layers.67.mlp.down_proj.weight": "model-00052-of-00061.safetensors", + "model.layers.67.mlp.gate_proj.weight": "model-00051-of-00061.safetensors", + "model.layers.67.mlp.up_proj.weight": "model-00051-of-00061.safetensors", + "model.layers.67.post_attention_layernorm.weight": "model-00052-of-00061.safetensors", + "model.layers.67.self_attn.k_proj.weight": "model-00051-of-00061.safetensors", + "model.layers.67.self_attn.o_proj.weight": "model-00051-of-00061.safetensors", + "model.layers.67.self_attn.q_proj.weight": "model-00051-of-00061.safetensors", + "model.layers.67.self_attn.v_proj.weight": "model-00051-of-00061.safetensors", + "model.layers.68.input_layernorm.weight": "model-00052-of-00061.safetensors", + "model.layers.68.mlp.down_proj.weight": "model-00052-of-00061.safetensors", + "model.layers.68.mlp.gate_proj.weight": "model-00052-of-00061.safetensors", + "model.layers.68.mlp.up_proj.weight": "model-00052-of-00061.safetensors", + "model.layers.68.post_attention_layernorm.weight": "model-00052-of-00061.safetensors", + "model.layers.68.self_attn.k_proj.weight": "model-00052-of-00061.safetensors", + "model.layers.68.self_attn.o_proj.weight": "model-00052-of-00061.safetensors", + "model.layers.68.self_attn.q_proj.weight": "model-00052-of-00061.safetensors", + "model.layers.68.self_attn.v_proj.weight": "model-00052-of-00061.safetensors", + "model.layers.69.input_layernorm.weight": "model-00053-of-00061.safetensors", + "model.layers.69.mlp.down_proj.weight": "model-00053-of-00061.safetensors", + "model.layers.69.mlp.gate_proj.weight": "model-00053-of-00061.safetensors", + "model.layers.69.mlp.up_proj.weight": "model-00053-of-00061.safetensors", + "model.layers.69.post_attention_layernorm.weight": "model-00053-of-00061.safetensors", + "model.layers.69.self_attn.k_proj.weight": "model-00052-of-00061.safetensors", + "model.layers.69.self_attn.o_proj.weight": "model-00052-of-00061.safetensors", + "model.layers.69.self_attn.q_proj.weight": "model-00052-of-00061.safetensors", + "model.layers.69.self_attn.v_proj.weight": "model-00052-of-00061.safetensors", + "model.layers.7.input_layernorm.weight": "model-00007-of-00061.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00007-of-00061.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00006-of-00061.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00006-of-00061.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00007-of-00061.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00006-of-00061.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00006-of-00061.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00006-of-00061.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00006-of-00061.safetensors", + "model.layers.70.input_layernorm.weight": "model-00054-of-00061.safetensors", + "model.layers.70.mlp.down_proj.weight": "model-00054-of-00061.safetensors", + "model.layers.70.mlp.gate_proj.weight": "model-00053-of-00061.safetensors", + "model.layers.70.mlp.up_proj.weight": "model-00054-of-00061.safetensors", + "model.layers.70.post_attention_layernorm.weight": "model-00054-of-00061.safetensors", + "model.layers.70.self_attn.k_proj.weight": "model-00053-of-00061.safetensors", + "model.layers.70.self_attn.o_proj.weight": "model-00053-of-00061.safetensors", + "model.layers.70.self_attn.q_proj.weight": "model-00053-of-00061.safetensors", + "model.layers.70.self_attn.v_proj.weight": "model-00053-of-00061.safetensors", + "model.layers.71.input_layernorm.weight": "model-00055-of-00061.safetensors", + "model.layers.71.mlp.down_proj.weight": "model-00055-of-00061.safetensors", + "model.layers.71.mlp.gate_proj.weight": "model-00054-of-00061.safetensors", + "model.layers.71.mlp.up_proj.weight": "model-00054-of-00061.safetensors", + "model.layers.71.post_attention_layernorm.weight": "model-00055-of-00061.safetensors", + "model.layers.71.self_attn.k_proj.weight": "model-00054-of-00061.safetensors", + "model.layers.71.self_attn.o_proj.weight": "model-00054-of-00061.safetensors", + "model.layers.71.self_attn.q_proj.weight": "model-00054-of-00061.safetensors", + "model.layers.71.self_attn.v_proj.weight": "model-00054-of-00061.safetensors", + "model.layers.72.input_layernorm.weight": "model-00055-of-00061.safetensors", + "model.layers.72.mlp.down_proj.weight": "model-00055-of-00061.safetensors", + "model.layers.72.mlp.gate_proj.weight": "model-00055-of-00061.safetensors", + "model.layers.72.mlp.up_proj.weight": "model-00055-of-00061.safetensors", + "model.layers.72.post_attention_layernorm.weight": "model-00055-of-00061.safetensors", + "model.layers.72.self_attn.k_proj.weight": "model-00055-of-00061.safetensors", + "model.layers.72.self_attn.o_proj.weight": "model-00055-of-00061.safetensors", + "model.layers.72.self_attn.q_proj.weight": "model-00055-of-00061.safetensors", + "model.layers.72.self_attn.v_proj.weight": "model-00055-of-00061.safetensors", + "model.layers.73.input_layernorm.weight": "model-00056-of-00061.safetensors", + "model.layers.73.mlp.down_proj.weight": "model-00056-of-00061.safetensors", + "model.layers.73.mlp.gate_proj.weight": "model-00056-of-00061.safetensors", + "model.layers.73.mlp.up_proj.weight": "model-00056-of-00061.safetensors", + "model.layers.73.post_attention_layernorm.weight": "model-00056-of-00061.safetensors", + "model.layers.73.self_attn.k_proj.weight": "model-00055-of-00061.safetensors", + "model.layers.73.self_attn.o_proj.weight": "model-00055-of-00061.safetensors", + "model.layers.73.self_attn.q_proj.weight": "model-00055-of-00061.safetensors", + "model.layers.73.self_attn.v_proj.weight": "model-00055-of-00061.safetensors", + "model.layers.74.input_layernorm.weight": "model-00057-of-00061.safetensors", + "model.layers.74.mlp.down_proj.weight": "model-00057-of-00061.safetensors", + "model.layers.74.mlp.gate_proj.weight": "model-00056-of-00061.safetensors", + "model.layers.74.mlp.up_proj.weight": "model-00057-of-00061.safetensors", + "model.layers.74.post_attention_layernorm.weight": "model-00057-of-00061.safetensors", + "model.layers.74.self_attn.k_proj.weight": "model-00056-of-00061.safetensors", + "model.layers.74.self_attn.o_proj.weight": "model-00056-of-00061.safetensors", + "model.layers.74.self_attn.q_proj.weight": "model-00056-of-00061.safetensors", + "model.layers.74.self_attn.v_proj.weight": "model-00056-of-00061.safetensors", + "model.layers.75.input_layernorm.weight": "model-00058-of-00061.safetensors", + "model.layers.75.mlp.down_proj.weight": "model-00058-of-00061.safetensors", + "model.layers.75.mlp.gate_proj.weight": "model-00057-of-00061.safetensors", + "model.layers.75.mlp.up_proj.weight": "model-00057-of-00061.safetensors", + "model.layers.75.post_attention_layernorm.weight": "model-00058-of-00061.safetensors", + "model.layers.75.self_attn.k_proj.weight": "model-00057-of-00061.safetensors", + "model.layers.75.self_attn.o_proj.weight": "model-00057-of-00061.safetensors", + "model.layers.75.self_attn.q_proj.weight": "model-00057-of-00061.safetensors", + "model.layers.75.self_attn.v_proj.weight": "model-00057-of-00061.safetensors", + "model.layers.76.input_layernorm.weight": "model-00058-of-00061.safetensors", + "model.layers.76.mlp.down_proj.weight": "model-00058-of-00061.safetensors", + "model.layers.76.mlp.gate_proj.weight": "model-00058-of-00061.safetensors", + "model.layers.76.mlp.up_proj.weight": "model-00058-of-00061.safetensors", + "model.layers.76.post_attention_layernorm.weight": "model-00058-of-00061.safetensors", + "model.layers.76.self_attn.k_proj.weight": "model-00058-of-00061.safetensors", + "model.layers.76.self_attn.o_proj.weight": "model-00058-of-00061.safetensors", + "model.layers.76.self_attn.q_proj.weight": "model-00058-of-00061.safetensors", + "model.layers.76.self_attn.v_proj.weight": "model-00058-of-00061.safetensors", + "model.layers.77.input_layernorm.weight": "model-00059-of-00061.safetensors", + "model.layers.77.mlp.down_proj.weight": "model-00059-of-00061.safetensors", + "model.layers.77.mlp.gate_proj.weight": "model-00059-of-00061.safetensors", + "model.layers.77.mlp.up_proj.weight": "model-00059-of-00061.safetensors", + "model.layers.77.post_attention_layernorm.weight": "model-00059-of-00061.safetensors", + "model.layers.77.self_attn.k_proj.weight": "model-00058-of-00061.safetensors", + "model.layers.77.self_attn.o_proj.weight": "model-00058-of-00061.safetensors", + "model.layers.77.self_attn.q_proj.weight": "model-00058-of-00061.safetensors", + "model.layers.77.self_attn.v_proj.weight": "model-00058-of-00061.safetensors", + "model.layers.78.input_layernorm.weight": "model-00060-of-00061.safetensors", + "model.layers.78.mlp.down_proj.weight": "model-00060-of-00061.safetensors", + "model.layers.78.mlp.gate_proj.weight": "model-00059-of-00061.safetensors", + "model.layers.78.mlp.up_proj.weight": "model-00060-of-00061.safetensors", + "model.layers.78.post_attention_layernorm.weight": "model-00060-of-00061.safetensors", + "model.layers.78.self_attn.k_proj.weight": "model-00059-of-00061.safetensors", + "model.layers.78.self_attn.o_proj.weight": "model-00059-of-00061.safetensors", + "model.layers.78.self_attn.q_proj.weight": "model-00059-of-00061.safetensors", + "model.layers.78.self_attn.v_proj.weight": "model-00059-of-00061.safetensors", + "model.layers.79.input_layernorm.weight": "model-00061-of-00061.safetensors", + "model.layers.79.mlp.down_proj.weight": "model-00061-of-00061.safetensors", + "model.layers.79.mlp.gate_proj.weight": "model-00060-of-00061.safetensors", + "model.layers.79.mlp.up_proj.weight": "model-00060-of-00061.safetensors", + "model.layers.79.post_attention_layernorm.weight": "model-00061-of-00061.safetensors", + "model.layers.79.self_attn.k_proj.weight": "model-00060-of-00061.safetensors", + "model.layers.79.self_attn.o_proj.weight": "model-00060-of-00061.safetensors", + "model.layers.79.self_attn.q_proj.weight": "model-00060-of-00061.safetensors", + "model.layers.79.self_attn.v_proj.weight": "model-00060-of-00061.safetensors", + "model.layers.8.input_layernorm.weight": "model-00007-of-00061.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00007-of-00061.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00007-of-00061.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00007-of-00061.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00007-of-00061.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00007-of-00061.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00007-of-00061.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00007-of-00061.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00007-of-00061.safetensors", + "model.layers.9.input_layernorm.weight": "model-00008-of-00061.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00008-of-00061.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00008-of-00061.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00008-of-00061.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00008-of-00061.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00007-of-00061.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00007-of-00061.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00007-of-00061.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00007-of-00061.safetensors", + "model.norm.weight": "model-00061-of-00061.safetensors" + } +}