diff --git a/.locks/models--WizardLM--WizardLM-70B-V1.0/21edfc81710e2dc1e41bd42b3a51bfff845568cf.lock b/.locks/models--WizardLM--WizardLM-70B-V1.0/21edfc81710e2dc1e41bd42b3a51bfff845568cf.lock new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.locks/models--WizardLM--WizardLM-70B-V1.0/508754bfb1263631e39be7a2cd3577f6b5657c16.lock b/.locks/models--WizardLM--WizardLM-70B-V1.0/508754bfb1263631e39be7a2cd3577f6b5657c16.lock new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.locks/models--WizardLM--WizardLM-70B-V1.0/603a03079fc818533152bed91278fd340c52b82c.lock b/.locks/models--WizardLM--WizardLM-70B-V1.0/603a03079fc818533152bed91278fd340c52b82c.lock new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.locks/models--WizardLM--WizardLM-70B-V1.0/8fec8bc88b5369637bd620dcbefe4d05769a696ade5476ba9e585a65ae4df440.lock b/.locks/models--WizardLM--WizardLM-70B-V1.0/8fec8bc88b5369637bd620dcbefe4d05769a696ade5476ba9e585a65ae4df440.lock new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.locks/models--WizardLM--WizardLM-70B-V1.0/9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347.lock b/.locks/models--WizardLM--WizardLM-70B-V1.0/9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347.lock new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.locks/models--WizardLM--WizardLM-70B-V1.0/aac9173f90ca8ae7a81f043ac396e55407275a58.lock b/.locks/models--WizardLM--WizardLM-70B-V1.0/aac9173f90ca8ae7a81f043ac396e55407275a58.lock new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.locks/models--WizardLM--WizardLM-70B-V1.0/e41416ddd79948246ea2dced6800ea3cd531c424.lock b/.locks/models--WizardLM--WizardLM-70B-V1.0/e41416ddd79948246ea2dced6800ea3cd531c424.lock new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..e41416ddd79948246ea2dced6800ea3cd531c424 --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,3 @@ +{ + "[PAD]": 32000 +} diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..603a03079fc818533152bed91278fd340c52b82c --- /dev/null +++ b/config.json @@ -0,0 +1,26 @@ +{ + "_name_or_path": "/home/aiscuser/Llama-2-70b-chat-hf", + "architectures": [ + "LlamaForCausalLM" + ], + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 4096, + "model_type": "llama", + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pad_token_id": 0, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "tie_word_embeddings": false, + "torch_dtype": "float16", + "transformers_version": "4.31.0", + "use_cache": false, + "vocab_size": 32001 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4eeac192ec12cc918890a65787afc7a356055406 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,10 @@ +{ + "bos_token_id": 1, + "eos_token_id": 2, + "max_length": 4096, + "pad_token_id": 0, + "do_sample": true, + "temperature": 0.9, + "top_p": 0.6, + "transformers_version": "4.31.0" +} diff --git a/model-00001-of-00029.safetensors b/model-00001-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ebd7f5174718e90f353b140b1a1bf7d60a9c09b9 --- /dev/null +++ b/model-00001-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b37c5384a3f08c9b7f595671defab670ecf06d8591f20c24838d313eab8c312d +size 9437351736 diff --git a/model-00002-of-00029.safetensors b/model-00002-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e19a105434dcefe4844b41f32e14b02573bf7ba2 --- /dev/null +++ b/model-00002-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:179582e4734adcba3ef66cb65332a7ae0d5482c1ebb0f9df30f9882699180035 +size 9328332824 diff --git a/model-00003-of-00029.safetensors b/model-00003-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..401e172855ae086b0b421e732a9851ba029d457f --- /dev/null +++ b/model-00003-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6c6af4256728a3ececd2a3f37677dfe40fb89864958588c8a4e6b9b700bc678 +size 9999421088 diff --git a/model-00004-of-00029.safetensors b/model-00004-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a301fff201836f7a908ec6ad52f153f80e65c305 --- /dev/null +++ b/model-00004-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0290d4d16e8bbf6e1ff650abc3ceede70d246a3b6ab5f48005477e61d518f21a +size 9932312376 diff --git a/model-00005-of-00029.safetensors b/model-00005-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f6cbd7fc71a9400c69f095b1a6dcfb30c1027952 --- /dev/null +++ b/model-00005-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57373446a8bfe5e5a018ec4fcb59ec44d70f304757f7f748870636c8b6007c8d +size 9328267088 diff --git a/model-00006-of-00029.safetensors b/model-00006-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3c8f6052b914add3f2336eb9fc413f8993d0e0ed --- /dev/null +++ b/model-00006-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:626de087f6f80fe0e8ea696f4c972ed7537b1bc39c7fc589e6cb47d168613c06 +size 9328332848 diff --git a/model-00007-of-00029.safetensors b/model-00007-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..589aa5b16320e3e2f2726bc92fa4e3cd503378a8 --- /dev/null +++ b/model-00007-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1eb392229b50ee79b331ca95841a9bd2d8b0744be9927838b82de3b33502a0e +size 9328332856 diff --git a/model-00008-of-00029.safetensors b/model-00008-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3ea764006fa0ff7b3f784a1e9b36c5e0ff09891b --- /dev/null +++ b/model-00008-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85e679a9ee87efbc9072dbef2c09d2097bef624b94e25aba6e588c9cbda6e520 +size 9999421120 diff --git a/model-00009-of-00029.safetensors b/model-00009-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f004b1c0de9a20f51027702a14cde70ea6771db2 --- /dev/null +++ b/model-00009-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ef967321593494900e31409f09994017d39fa50db140ecd39a27046e30c22f1 +size 9932312392 diff --git a/model-00010-of-00029.safetensors b/model-00010-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..415970255c372c337123e9fee1929b00eb1fb1db --- /dev/null +++ b/model-00010-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07e2595e914edd2a674fd0d758b207d984e3903ebaad8e0fa22c12669edfa2b0 +size 9328267088 diff --git a/model-00011-of-00029.safetensors b/model-00011-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5d692dc5d8415ef73e029b3803c16336aac3e253 --- /dev/null +++ b/model-00011-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c1f4ccc00c139a562c9fcbb2c92c0be81e6b983514c0a403d176414bf053819 +size 9328332848 diff --git a/model-00012-of-00029.safetensors b/model-00012-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c8353ea661bf9bb0f601c646dcc31e2319b4ce79 --- /dev/null +++ b/model-00012-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04581c72eaf85eb30eac3052289b30687ecfaf3f290c53b949b5ba1c8772f7bb +size 9328332856 diff --git a/model-00013-of-00029.safetensors b/model-00013-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7345e900071542b8fdce70b7f4de0ae0e033410e --- /dev/null +++ b/model-00013-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caf6527dfacce3cf86750c8dc4f07376cfc1da0be837828d307206825b5e8abf +size 9999421120 diff --git a/model-00014-of-00029.safetensors b/model-00014-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..04fd3dd0e625ea2d415461adcfd3c0dd0d89ed3a --- /dev/null +++ b/model-00014-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cc4cfbc1f45b49511d022f19750856bdfbbbd037ed0d6af16b6fe5f9ec53088 +size 9932312392 diff --git a/model-00015-of-00029.safetensors b/model-00015-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..daf1d5f6a578a3ddeca53d4833566f8bcde0e298 --- /dev/null +++ b/model-00015-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:987e302ba9fb7ded7a5673ed2e84793bc46a87fe9288a23af9f881bcc724be45 +size 9328267088 diff --git a/model-00016-of-00029.safetensors b/model-00016-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c384ed733f1ea423c297efb570bdb33e9f024d5f --- /dev/null +++ b/model-00016-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:726e42f9be216387759a073472f8a654a85d271ea6a75497a8103a6051fe33d2 +size 9328332848 diff --git a/model-00017-of-00029.safetensors b/model-00017-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1414cd852aa879ed97b1b752265532f087c70178 --- /dev/null +++ b/model-00017-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a134c60101838b7b23caebf0f20a5e43c2987a55fbbc0724c77c796aab18e599 +size 9328332856 diff --git a/model-00018-of-00029.safetensors b/model-00018-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8c5b1fe878fc3480b9aacba18a6743f9576f1695 --- /dev/null +++ b/model-00018-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d14d07a7030a345fceac7acfc72fa017a328693418292ac91203d2c077c7c9b1 +size 9999421120 diff --git a/model-00019-of-00029.safetensors b/model-00019-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4984b39ef733b9abf50879ba8cfde9465aa8b518 --- /dev/null +++ b/model-00019-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:576afcad41ca1d9eb1ac98d616e4a312a451ab3134add2d44af30f06ae93170a +size 9932312392 diff --git a/model-00020-of-00029.safetensors b/model-00020-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9032edc829b6dbb448eac1d8c468cad9345a9788 --- /dev/null +++ b/model-00020-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:518234cbfb31a5f354226bbfdfd3703deb57539184f1d65e3d2e860a2c6fee6b +size 9328267088 diff --git a/model-00021-of-00029.safetensors b/model-00021-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..278f8b9a7dc624abf358dc45d4710d3db61a3418 --- /dev/null +++ b/model-00021-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd5ea45c6731d3164c6b747983002f0f5d41de4847fcd59f95cd1e7e84e56465 +size 9328332848 diff --git a/model-00022-of-00029.safetensors b/model-00022-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fc79295207c27bc4b518608d7694da319b9643e8 --- /dev/null +++ b/model-00022-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee1ac2dee095572731f712d81d441ed123e0c39d1f50633ab668fc29bf91fb18 +size 9328332856 diff --git a/model-00023-of-00029.safetensors b/model-00023-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..51fc07494ca439c5041c2911fd672e86230044bb --- /dev/null +++ b/model-00023-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f8ba7d46f69198032f53884d15d5c2d56678df908514c4b9db235d1966e7ef9 +size 9999421120 diff --git a/model-00024-of-00029.safetensors b/model-00024-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..97e42395ae8b24fe438b5f1b414d6806f7cdea7a --- /dev/null +++ b/model-00024-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3ac5559235d76e1b691c5c8550c6e520f05a58baa5ecf3abcd4cce88f69f2cb +size 9932312392 diff --git a/model-00025-of-00029.safetensors b/model-00025-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad6b41db2110654c73800ffad8ac816bd6595d7e --- /dev/null +++ b/model-00025-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1d9a3537df5a3526d3a7a3cf6f759631cca637661b52654b4b941dc8be53946 +size 9328267088 diff --git a/model-00026-of-00029.safetensors b/model-00026-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0633d9e0aef924863dbc1fd0c682b34ef6ca2aae --- /dev/null +++ b/model-00026-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cb8204829047181d5e14d92c0b26d9f244f2be747dceec67a50526a4da3bd5d +size 9328332848 diff --git a/model-00027-of-00029.safetensors b/model-00027-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e10f080262b37a10567988959b8511a628d1ca1c --- /dev/null +++ b/model-00027-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e9233a70b0a6b55fb7dd8872f7bd78bb07279f81a72791b232625f696c8c5f4 +size 9328332856 diff --git a/model-00028-of-00029.safetensors b/model-00028-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fee156321eb13a4d86b2a0a03d32264b57daac35 --- /dev/null +++ b/model-00028-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14e105cbe0ad2d716e343f05c4e8b1042a0f99b86e58d2749c89c7195d700902 +size 9999421120 diff --git a/model-00029-of-00029.safetensors b/model-00029-of-00029.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..471044493f8701e6a2bfec2200d29012f99814a7 --- /dev/null +++ b/model-00029-of-00029.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d0c80d7177f4e130d0f7e906e877f4e98cbc8a93ae4dc3793cc89ff9f3ef087 +size 7558335112 diff --git a/pytorch_model-00001-of-00029.bin b/pytorch_model-00001-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..c3eb1824dec82f804f6407e57c431a93089936c8 --- /dev/null +++ b/pytorch_model-00001-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fec8bc88b5369637bd620dcbefe4d05769a696ade5476ba9e585a65ae4df440 +size 9437357897 diff --git a/pytorch_model-00002-of-00029.bin b/pytorch_model-00002-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..676cfdd43de8a9eeaca6470c4c5847ac40bcb02f --- /dev/null +++ b/pytorch_model-00002-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3525c7c66ea76c0634d95d1cac991899da468c650230ef0cadb972e22fc3312 +size 9328339409 diff --git a/pytorch_model-00003-of-00029.bin b/pytorch_model-00003-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..a46f340f5e7aa693c5d8da708b9b3d41b046c463 --- /dev/null +++ b/pytorch_model-00003-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fafe5157bd059cc02d431fd5310d5857749b9224bd416cbda7c1f5d612e59362 +size 9999427503 diff --git a/pytorch_model-00004-of-00029.bin b/pytorch_model-00004-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..f0b7f67138cf2c3bbf61002bcacde06ab6fc5731 --- /dev/null +++ b/pytorch_model-00004-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:688fe47669050d7c37f33e0d1e7079b1c39618039c38722d40f5afa5aeb2089f +size 9932318513 diff --git a/pytorch_model-00005-of-00029.bin b/pytorch_model-00005-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..737bcab07af5725a7c4143fe511deccad93ae6e9 --- /dev/null +++ b/pytorch_model-00005-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:215631080cf223ed3b10527433fe0a8cb58fd8232803c4e3a062662fbfc10674 +size 9328273153 diff --git a/pytorch_model-00006-of-00029.bin b/pytorch_model-00006-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..09c2823837f7cf79f2fe34ae5286a7862e048b36 --- /dev/null +++ b/pytorch_model-00006-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60d6e4e8b0587e4ab5afa3f3e96f705fe564469ab8ae22041ec2e28075947e61 +size 9328339385 diff --git a/pytorch_model-00007-of-00029.bin b/pytorch_model-00007-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..4a4e8afd084d84aeb524818332b9d3c2bc1b3e40 --- /dev/null +++ b/pytorch_model-00007-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41c1d019cf7aec2bbdb2e3cd0b0b5834ff1b7459ae341c9744d59042d4626add +size 9328339409 diff --git a/pytorch_model-00008-of-00029.bin b/pytorch_model-00008-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..749a2afa98eb8a8bf7d2210ef201837d0db29108 --- /dev/null +++ b/pytorch_model-00008-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d46a5ab93d8ad1c9432dfba8176531f6701d756d163a59d6fadb75d903cd1cee +size 9999427567 diff --git a/pytorch_model-00009-of-00029.bin b/pytorch_model-00009-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..753d41431f94ec57b7069a47ef86d8626744e709 --- /dev/null +++ b/pytorch_model-00009-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1c6bcccc3c8c39e80dce8cfeefa9a6646222da1c62bc59fda2875110a96525b +size 9932318513 diff --git a/pytorch_model-00010-of-00029.bin b/pytorch_model-00010-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..522aeeb705e2a700b32f05624b390751b5286a02 --- /dev/null +++ b/pytorch_model-00010-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72995fd3cb8fb23cb8a525ccffbbd73d24b7d980e70ae8efe6591918c2f9aa96 +size 9328273153 diff --git a/pytorch_model-00011-of-00029.bin b/pytorch_model-00011-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..2b06281280db2be23da0e19da8b88dcbe94b5e8c --- /dev/null +++ b/pytorch_model-00011-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de3f0b21843582c6fbd8c39393e5651a6973b4cf159ea6aff9d262d74a9247f0 +size 9328339385 diff --git a/pytorch_model-00012-of-00029.bin b/pytorch_model-00012-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..eb1b709cb0435b7edc6db011c6bfcba7471bac1c --- /dev/null +++ b/pytorch_model-00012-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9af896b836e6f42ae4333a0aa2d0624c767e8dfe9241a74c0d22641834246b22 +size 9328339409 diff --git a/pytorch_model-00013-of-00029.bin b/pytorch_model-00013-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..c6566bbb72628b8d3ba1caa22d329c0d50c0755b --- /dev/null +++ b/pytorch_model-00013-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a47c4a7f9b5414843837c5333d42ad6eddcdc64faea1b49c3bc68793be0a69c +size 9999427567 diff --git a/pytorch_model-00014-of-00029.bin b/pytorch_model-00014-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..ffb0c06c2ed5da7777e56dc87f72f197666bf832 --- /dev/null +++ b/pytorch_model-00014-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d410909af537b680437a1dc61776691a0c559f00278bfa001451a7aaab6a8ae0 +size 9932318513 diff --git a/pytorch_model-00015-of-00029.bin b/pytorch_model-00015-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..15a9df652d167ada1f2db5d7baa4e3ea7c2f75fc --- /dev/null +++ b/pytorch_model-00015-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dd77680bc692413eb31d7af2efa8f733452132365301c89492469a80c4e6b78 +size 9328273153 diff --git a/pytorch_model-00016-of-00029.bin b/pytorch_model-00016-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..5cd98ffd91c757f9f7ee857e22b9fac7f225086a --- /dev/null +++ b/pytorch_model-00016-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3973fb2e6f28dd4d56f65aa33110c290c76c6344ede7b5e364df5f2b415ea2d4 +size 9328339385 diff --git a/pytorch_model-00017-of-00029.bin b/pytorch_model-00017-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..1a2a1de644372d93517745925b1ab982ee4fd07a --- /dev/null +++ b/pytorch_model-00017-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:358e993b5105817e322950447bf5a603d678c6e148f70581a5a23c5186727419 +size 9328339409 diff --git a/pytorch_model-00018-of-00029.bin b/pytorch_model-00018-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..768e3696c25efd863ba515180a38a198468f5cee --- /dev/null +++ b/pytorch_model-00018-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e881d63c917a6c234b34be1cd88ace8fcfdc289897f1ec49f99510226fae5e +size 9999427567 diff --git a/pytorch_model-00019-of-00029.bin b/pytorch_model-00019-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..8d9e0aa8e1127d3686db28ce8939c204109fdee9 --- /dev/null +++ b/pytorch_model-00019-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94feee994f7d854fbfc516df4d044754132bff9e426716b7bb70c20c8a389368 +size 9932318513 diff --git a/pytorch_model-00020-of-00029.bin b/pytorch_model-00020-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..a65b29f58d92916652ad48847ba214d42aba6381 --- /dev/null +++ b/pytorch_model-00020-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91057e1652048fe60dc1fab69bcfe916e36ecd3a7e89bd8a91123d2b19b6279f +size 9328273153 diff --git a/pytorch_model-00021-of-00029.bin b/pytorch_model-00021-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..5023bffd66e67f37d7d6c496e14d64da8d6e7494 --- /dev/null +++ b/pytorch_model-00021-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeea502dc17ce92c2848044203e04e7e9dd1ff702fa9b0ef195dd71dfc609a6 +size 9328339385 diff --git a/pytorch_model-00022-of-00029.bin b/pytorch_model-00022-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..6c88f7868368a9a7069d387317ba95b464602986 --- /dev/null +++ b/pytorch_model-00022-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9d8f989030e3a45ea15b6aa758c974076f324b32bb3286ca000e44419b6817d +size 9328339409 diff --git a/pytorch_model-00023-of-00029.bin b/pytorch_model-00023-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..8a8ad7dde479f4d9d60cc7add23aa92d122ac6db --- /dev/null +++ b/pytorch_model-00023-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:430d8e826604636c60918c4293dfed379d5cca8a2a038f31176acb8dfc2a59bc +size 9999427567 diff --git a/pytorch_model-00024-of-00029.bin b/pytorch_model-00024-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..de21f189b27db9aef3612c4c78a26c89803bcb28 --- /dev/null +++ b/pytorch_model-00024-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a05063bb4a55b16950e8a56dc09198085d592ad8d8e036e42da0b37b07364e4b +size 9932318513 diff --git a/pytorch_model-00025-of-00029.bin b/pytorch_model-00025-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..d5043552b40b9044bcf715af70142ee840bcb920 --- /dev/null +++ b/pytorch_model-00025-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf76c50b36476b9ec5ce4ac80141c502c1d6a7fdf52d8531ce7276c9dfd86b78 +size 9328273153 diff --git a/pytorch_model-00026-of-00029.bin b/pytorch_model-00026-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..92ed2077f1f5cf53431367ccef4966b3c115c3f3 --- /dev/null +++ b/pytorch_model-00026-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd25d1c9315251409cc249a0cf91e35bbb9c88aea7d9c96b82f7c6df69ce7bec +size 9328339385 diff --git a/pytorch_model-00027-of-00029.bin b/pytorch_model-00027-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..d93893f28183cbd575c1ca0ff24c81d923af2225 --- /dev/null +++ b/pytorch_model-00027-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ab6f4e336228a23f2f539dc2d8a2ec64ae7f6f34bb69f40bd2359ac4d6f0a2b +size 9328339409 diff --git a/pytorch_model-00028-of-00029.bin b/pytorch_model-00028-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..a909ef4800542b8befb8b23e3abad53909f4348a --- /dev/null +++ b/pytorch_model-00028-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f7c9c53126ed405aeb31dc7217f9838dd90cde5620a9938b31e022a5afc1a4c +size 9999427567 diff --git a/pytorch_model-00029-of-00029.bin b/pytorch_model-00029-of-00029.bin new file mode 100644 index 0000000000000000000000000000000000000000..10466d0fb76b2a8d90391d9104b5c1096efa654a --- /dev/null +++ b/pytorch_model-00029-of-00029.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e7321dae22752666b75929aae4fbecc4ca62e4d4c75aa0f1b128f4818cb7ab1 +size 7558339545 diff --git a/pytorch_model.bin.index.json b/pytorch_model.bin.index.json new file mode 100644 index 0000000000000000000000000000000000000000..aac9173f90ca8ae7a81f043ac396e55407275a58 --- /dev/null +++ b/pytorch_model.bin.index.json @@ -0,0 +1,810 @@ +{ + "metadata": { + "total_size": 275906678784 + }, + "weight_map": { + "lm_head.weight": "pytorch_model-00029-of-00029.bin", + "model.embed_tokens.weight": "pytorch_model-00001-of-00029.bin", + "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00029.bin", + "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00029.bin", + "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00029.bin", + "model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00029.bin", + "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00029.bin", + "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00029.bin", + "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00029.bin", + "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00029.bin", + "model.layers.0.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00029.bin", + "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00029.bin", + "model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00029.bin", + "model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00029.bin", + "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00029.bin", + "model.layers.1.mlp.up_proj.weight": "pytorch_model-00001-of-00029.bin", + "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00029.bin", + "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00029.bin", + "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00029.bin", + "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00029.bin", + "model.layers.1.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00029.bin", + "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00029.bin", + "model.layers.10.input_layernorm.weight": "pytorch_model-00004-of-00029.bin", + "model.layers.10.mlp.down_proj.weight": "pytorch_model-00004-of-00029.bin", + "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00004-of-00029.bin", + "model.layers.10.mlp.up_proj.weight": "pytorch_model-00004-of-00029.bin", + "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00004-of-00029.bin", + "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00004-of-00029.bin", + "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00004-of-00029.bin", + "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00004-of-00029.bin", + "model.layers.10.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00029.bin", + "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00004-of-00029.bin", + "model.layers.11.input_layernorm.weight": "pytorch_model-00005-of-00029.bin", + "model.layers.11.mlp.down_proj.weight": "pytorch_model-00005-of-00029.bin", + "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00005-of-00029.bin", + "model.layers.11.mlp.up_proj.weight": "pytorch_model-00005-of-00029.bin", + "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00005-of-00029.bin", + "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00005-of-00029.bin", + "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00005-of-00029.bin", + "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00005-of-00029.bin", + "model.layers.11.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00029.bin", + "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00005-of-00029.bin", + "model.layers.12.input_layernorm.weight": "pytorch_model-00005-of-00029.bin", + "model.layers.12.mlp.down_proj.weight": "pytorch_model-00005-of-00029.bin", + "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00005-of-00029.bin", + "model.layers.12.mlp.up_proj.weight": "pytorch_model-00005-of-00029.bin", + "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00005-of-00029.bin", + "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00005-of-00029.bin", + "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00005-of-00029.bin", + "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00005-of-00029.bin", + "model.layers.12.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00029.bin", + "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00005-of-00029.bin", + "model.layers.13.input_layernorm.weight": "pytorch_model-00006-of-00029.bin", + "model.layers.13.mlp.down_proj.weight": "pytorch_model-00006-of-00029.bin", + "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00005-of-00029.bin", + "model.layers.13.mlp.up_proj.weight": "pytorch_model-00005-of-00029.bin", + "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00006-of-00029.bin", + "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00005-of-00029.bin", + "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00005-of-00029.bin", + "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00005-of-00029.bin", + "model.layers.13.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00029.bin", + "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00005-of-00029.bin", + "model.layers.14.input_layernorm.weight": "pytorch_model-00006-of-00029.bin", + "model.layers.14.mlp.down_proj.weight": "pytorch_model-00006-of-00029.bin", + "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00006-of-00029.bin", + "model.layers.14.mlp.up_proj.weight": "pytorch_model-00006-of-00029.bin", + "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00006-of-00029.bin", + "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00006-of-00029.bin", + "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00006-of-00029.bin", + "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00006-of-00029.bin", + "model.layers.14.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00029.bin", + "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00006-of-00029.bin", + "model.layers.15.input_layernorm.weight": "pytorch_model-00006-of-00029.bin", + "model.layers.15.mlp.down_proj.weight": "pytorch_model-00006-of-00029.bin", + "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00006-of-00029.bin", + "model.layers.15.mlp.up_proj.weight": "pytorch_model-00006-of-00029.bin", + "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00006-of-00029.bin", + "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00006-of-00029.bin", + "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00006-of-00029.bin", + "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00006-of-00029.bin", + "model.layers.15.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00029.bin", + "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00006-of-00029.bin", + "model.layers.16.input_layernorm.weight": "pytorch_model-00007-of-00029.bin", + "model.layers.16.mlp.down_proj.weight": "pytorch_model-00007-of-00029.bin", + "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00006-of-00029.bin", + "model.layers.16.mlp.up_proj.weight": "pytorch_model-00007-of-00029.bin", + "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00007-of-00029.bin", + "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00006-of-00029.bin", + "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00006-of-00029.bin", + "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00006-of-00029.bin", + "model.layers.16.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00029.bin", + "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00006-of-00029.bin", + "model.layers.17.input_layernorm.weight": "pytorch_model-00007-of-00029.bin", + "model.layers.17.mlp.down_proj.weight": "pytorch_model-00007-of-00029.bin", + "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00007-of-00029.bin", + "model.layers.17.mlp.up_proj.weight": "pytorch_model-00007-of-00029.bin", + "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00007-of-00029.bin", + "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00007-of-00029.bin", + "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00007-of-00029.bin", + "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00007-of-00029.bin", + "model.layers.17.self_attn.rotary_emb.inv_freq": "pytorch_model-00007-of-00029.bin", + "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00007-of-00029.bin", + "model.layers.18.input_layernorm.weight": "pytorch_model-00007-of-00029.bin", + "model.layers.18.mlp.down_proj.weight": "pytorch_model-00007-of-00029.bin", + "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00007-of-00029.bin", + "model.layers.18.mlp.up_proj.weight": "pytorch_model-00007-of-00029.bin", + "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00007-of-00029.bin", + "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00007-of-00029.bin", + "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00007-of-00029.bin", + "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00007-of-00029.bin", + "model.layers.18.self_attn.rotary_emb.inv_freq": "pytorch_model-00007-of-00029.bin", + "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00007-of-00029.bin", + "model.layers.19.input_layernorm.weight": "pytorch_model-00008-of-00029.bin", + "model.layers.19.mlp.down_proj.weight": "pytorch_model-00008-of-00029.bin", + "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00008-of-00029.bin", + "model.layers.19.mlp.up_proj.weight": "pytorch_model-00008-of-00029.bin", + "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00008-of-00029.bin", + "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00007-of-00029.bin", + "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00007-of-00029.bin", + "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00007-of-00029.bin", + "model.layers.19.self_attn.rotary_emb.inv_freq": "pytorch_model-00007-of-00029.bin", + "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00007-of-00029.bin", + "model.layers.2.input_layernorm.weight": "pytorch_model-00002-of-00029.bin", + "model.layers.2.mlp.down_proj.weight": "pytorch_model-00002-of-00029.bin", + "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00001-of-00029.bin", + "model.layers.2.mlp.up_proj.weight": "pytorch_model-00002-of-00029.bin", + "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00002-of-00029.bin", + "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00029.bin", + "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00029.bin", + "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00029.bin", + "model.layers.2.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00029.bin", + "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00029.bin", + "model.layers.20.input_layernorm.weight": "pytorch_model-00008-of-00029.bin", + "model.layers.20.mlp.down_proj.weight": "pytorch_model-00008-of-00029.bin", + "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00008-of-00029.bin", + "model.layers.20.mlp.up_proj.weight": "pytorch_model-00008-of-00029.bin", + "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00008-of-00029.bin", + "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00008-of-00029.bin", + "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00008-of-00029.bin", + "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00008-of-00029.bin", + "model.layers.20.self_attn.rotary_emb.inv_freq": "pytorch_model-00008-of-00029.bin", + "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00008-of-00029.bin", + "model.layers.21.input_layernorm.weight": "pytorch_model-00008-of-00029.bin", + "model.layers.21.mlp.down_proj.weight": "pytorch_model-00008-of-00029.bin", + "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00008-of-00029.bin", + "model.layers.21.mlp.up_proj.weight": "pytorch_model-00008-of-00029.bin", + "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00008-of-00029.bin", + "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00008-of-00029.bin", + "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00008-of-00029.bin", + "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00008-of-00029.bin", + "model.layers.21.self_attn.rotary_emb.inv_freq": "pytorch_model-00008-of-00029.bin", + "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00008-of-00029.bin", + "model.layers.22.input_layernorm.weight": "pytorch_model-00009-of-00029.bin", + "model.layers.22.mlp.down_proj.weight": "pytorch_model-00009-of-00029.bin", + "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00009-of-00029.bin", + "model.layers.22.mlp.up_proj.weight": "pytorch_model-00009-of-00029.bin", + "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00009-of-00029.bin", + "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00008-of-00029.bin", + "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00009-of-00029.bin", + "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00008-of-00029.bin", + "model.layers.22.self_attn.rotary_emb.inv_freq": "pytorch_model-00009-of-00029.bin", + "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00008-of-00029.bin", + "model.layers.23.input_layernorm.weight": "pytorch_model-00009-of-00029.bin", + "model.layers.23.mlp.down_proj.weight": "pytorch_model-00009-of-00029.bin", + "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00009-of-00029.bin", + "model.layers.23.mlp.up_proj.weight": "pytorch_model-00009-of-00029.bin", + "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00009-of-00029.bin", + "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00009-of-00029.bin", + "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00009-of-00029.bin", + "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00009-of-00029.bin", + "model.layers.23.self_attn.rotary_emb.inv_freq": "pytorch_model-00009-of-00029.bin", + "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00009-of-00029.bin", + "model.layers.24.input_layernorm.weight": "pytorch_model-00009-of-00029.bin", + "model.layers.24.mlp.down_proj.weight": "pytorch_model-00009-of-00029.bin", + "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00009-of-00029.bin", + "model.layers.24.mlp.up_proj.weight": "pytorch_model-00009-of-00029.bin", + "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00009-of-00029.bin", + "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00009-of-00029.bin", + "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00009-of-00029.bin", + "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00009-of-00029.bin", + "model.layers.24.self_attn.rotary_emb.inv_freq": "pytorch_model-00009-of-00029.bin", + "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00009-of-00029.bin", + "model.layers.25.input_layernorm.weight": "pytorch_model-00010-of-00029.bin", + "model.layers.25.mlp.down_proj.weight": "pytorch_model-00010-of-00029.bin", + "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00010-of-00029.bin", + "model.layers.25.mlp.up_proj.weight": "pytorch_model-00010-of-00029.bin", + "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00010-of-00029.bin", + "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00010-of-00029.bin", + "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00010-of-00029.bin", + "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00010-of-00029.bin", + "model.layers.25.self_attn.rotary_emb.inv_freq": "pytorch_model-00010-of-00029.bin", + "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00010-of-00029.bin", + "model.layers.26.input_layernorm.weight": "pytorch_model-00010-of-00029.bin", + "model.layers.26.mlp.down_proj.weight": "pytorch_model-00010-of-00029.bin", + "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00010-of-00029.bin", + "model.layers.26.mlp.up_proj.weight": "pytorch_model-00010-of-00029.bin", + "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00010-of-00029.bin", + "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00010-of-00029.bin", + "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00010-of-00029.bin", + "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00010-of-00029.bin", + "model.layers.26.self_attn.rotary_emb.inv_freq": "pytorch_model-00010-of-00029.bin", + "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00010-of-00029.bin", + "model.layers.27.input_layernorm.weight": "pytorch_model-00011-of-00029.bin", + "model.layers.27.mlp.down_proj.weight": "pytorch_model-00011-of-00029.bin", + "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00010-of-00029.bin", + "model.layers.27.mlp.up_proj.weight": "pytorch_model-00010-of-00029.bin", + "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00011-of-00029.bin", + "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00010-of-00029.bin", + "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00010-of-00029.bin", + "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00010-of-00029.bin", + "model.layers.27.self_attn.rotary_emb.inv_freq": "pytorch_model-00010-of-00029.bin", + "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00010-of-00029.bin", + "model.layers.28.input_layernorm.weight": "pytorch_model-00011-of-00029.bin", + "model.layers.28.mlp.down_proj.weight": "pytorch_model-00011-of-00029.bin", + "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00011-of-00029.bin", + "model.layers.28.mlp.up_proj.weight": "pytorch_model-00011-of-00029.bin", + "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00011-of-00029.bin", + "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00011-of-00029.bin", + "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00011-of-00029.bin", + "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00011-of-00029.bin", + "model.layers.28.self_attn.rotary_emb.inv_freq": "pytorch_model-00011-of-00029.bin", + "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00011-of-00029.bin", + "model.layers.29.input_layernorm.weight": "pytorch_model-00011-of-00029.bin", + "model.layers.29.mlp.down_proj.weight": "pytorch_model-00011-of-00029.bin", + "model.layers.29.mlp.gate_proj.weight": "pytorch_model-00011-of-00029.bin", + "model.layers.29.mlp.up_proj.weight": "pytorch_model-00011-of-00029.bin", + "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00011-of-00029.bin", + "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00011-of-00029.bin", + "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00011-of-00029.bin", + "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00011-of-00029.bin", + "model.layers.29.self_attn.rotary_emb.inv_freq": "pytorch_model-00011-of-00029.bin", + "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00011-of-00029.bin", + "model.layers.3.input_layernorm.weight": "pytorch_model-00002-of-00029.bin", + "model.layers.3.mlp.down_proj.weight": "pytorch_model-00002-of-00029.bin", + "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00002-of-00029.bin", + "model.layers.3.mlp.up_proj.weight": "pytorch_model-00002-of-00029.bin", + "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00002-of-00029.bin", + "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00002-of-00029.bin", + "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00002-of-00029.bin", + "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00002-of-00029.bin", + "model.layers.3.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00029.bin", + "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00002-of-00029.bin", + "model.layers.30.input_layernorm.weight": "pytorch_model-00012-of-00029.bin", + "model.layers.30.mlp.down_proj.weight": "pytorch_model-00012-of-00029.bin", + "model.layers.30.mlp.gate_proj.weight": "pytorch_model-00011-of-00029.bin", + "model.layers.30.mlp.up_proj.weight": "pytorch_model-00012-of-00029.bin", + "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00012-of-00029.bin", + "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00011-of-00029.bin", + "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00011-of-00029.bin", + "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00011-of-00029.bin", + "model.layers.30.self_attn.rotary_emb.inv_freq": "pytorch_model-00011-of-00029.bin", + "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00011-of-00029.bin", + "model.layers.31.input_layernorm.weight": "pytorch_model-00012-of-00029.bin", + "model.layers.31.mlp.down_proj.weight": "pytorch_model-00012-of-00029.bin", + "model.layers.31.mlp.gate_proj.weight": "pytorch_model-00012-of-00029.bin", + "model.layers.31.mlp.up_proj.weight": "pytorch_model-00012-of-00029.bin", + "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00012-of-00029.bin", + "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00012-of-00029.bin", + "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00012-of-00029.bin", + "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00012-of-00029.bin", + "model.layers.31.self_attn.rotary_emb.inv_freq": "pytorch_model-00012-of-00029.bin", + "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00012-of-00029.bin", + "model.layers.32.input_layernorm.weight": "pytorch_model-00012-of-00029.bin", + "model.layers.32.mlp.down_proj.weight": "pytorch_model-00012-of-00029.bin", + "model.layers.32.mlp.gate_proj.weight": "pytorch_model-00012-of-00029.bin", + "model.layers.32.mlp.up_proj.weight": "pytorch_model-00012-of-00029.bin", + "model.layers.32.post_attention_layernorm.weight": "pytorch_model-00012-of-00029.bin", + "model.layers.32.self_attn.k_proj.weight": "pytorch_model-00012-of-00029.bin", + "model.layers.32.self_attn.o_proj.weight": "pytorch_model-00012-of-00029.bin", + "model.layers.32.self_attn.q_proj.weight": "pytorch_model-00012-of-00029.bin", + "model.layers.32.self_attn.rotary_emb.inv_freq": "pytorch_model-00012-of-00029.bin", + "model.layers.32.self_attn.v_proj.weight": "pytorch_model-00012-of-00029.bin", + "model.layers.33.input_layernorm.weight": "pytorch_model-00013-of-00029.bin", + "model.layers.33.mlp.down_proj.weight": "pytorch_model-00013-of-00029.bin", + "model.layers.33.mlp.gate_proj.weight": "pytorch_model-00013-of-00029.bin", + "model.layers.33.mlp.up_proj.weight": "pytorch_model-00013-of-00029.bin", + "model.layers.33.post_attention_layernorm.weight": "pytorch_model-00013-of-00029.bin", + "model.layers.33.self_attn.k_proj.weight": "pytorch_model-00012-of-00029.bin", + "model.layers.33.self_attn.o_proj.weight": "pytorch_model-00012-of-00029.bin", + "model.layers.33.self_attn.q_proj.weight": "pytorch_model-00012-of-00029.bin", + "model.layers.33.self_attn.rotary_emb.inv_freq": "pytorch_model-00012-of-00029.bin", + "model.layers.33.self_attn.v_proj.weight": "pytorch_model-00012-of-00029.bin", + "model.layers.34.input_layernorm.weight": "pytorch_model-00013-of-00029.bin", + "model.layers.34.mlp.down_proj.weight": "pytorch_model-00013-of-00029.bin", + "model.layers.34.mlp.gate_proj.weight": "pytorch_model-00013-of-00029.bin", + "model.layers.34.mlp.up_proj.weight": "pytorch_model-00013-of-00029.bin", + "model.layers.34.post_attention_layernorm.weight": "pytorch_model-00013-of-00029.bin", + "model.layers.34.self_attn.k_proj.weight": "pytorch_model-00013-of-00029.bin", + "model.layers.34.self_attn.o_proj.weight": "pytorch_model-00013-of-00029.bin", + "model.layers.34.self_attn.q_proj.weight": "pytorch_model-00013-of-00029.bin", + "model.layers.34.self_attn.rotary_emb.inv_freq": "pytorch_model-00013-of-00029.bin", + "model.layers.34.self_attn.v_proj.weight": "pytorch_model-00013-of-00029.bin", + "model.layers.35.input_layernorm.weight": "pytorch_model-00013-of-00029.bin", + "model.layers.35.mlp.down_proj.weight": "pytorch_model-00013-of-00029.bin", + "model.layers.35.mlp.gate_proj.weight": "pytorch_model-00013-of-00029.bin", + "model.layers.35.mlp.up_proj.weight": "pytorch_model-00013-of-00029.bin", + "model.layers.35.post_attention_layernorm.weight": "pytorch_model-00013-of-00029.bin", + "model.layers.35.self_attn.k_proj.weight": "pytorch_model-00013-of-00029.bin", + "model.layers.35.self_attn.o_proj.weight": "pytorch_model-00013-of-00029.bin", + "model.layers.35.self_attn.q_proj.weight": "pytorch_model-00013-of-00029.bin", + "model.layers.35.self_attn.rotary_emb.inv_freq": "pytorch_model-00013-of-00029.bin", + "model.layers.35.self_attn.v_proj.weight": "pytorch_model-00013-of-00029.bin", + "model.layers.36.input_layernorm.weight": "pytorch_model-00014-of-00029.bin", + "model.layers.36.mlp.down_proj.weight": "pytorch_model-00014-of-00029.bin", + "model.layers.36.mlp.gate_proj.weight": "pytorch_model-00014-of-00029.bin", + "model.layers.36.mlp.up_proj.weight": "pytorch_model-00014-of-00029.bin", + "model.layers.36.post_attention_layernorm.weight": "pytorch_model-00014-of-00029.bin", + "model.layers.36.self_attn.k_proj.weight": "pytorch_model-00013-of-00029.bin", + "model.layers.36.self_attn.o_proj.weight": "pytorch_model-00014-of-00029.bin", + "model.layers.36.self_attn.q_proj.weight": "pytorch_model-00013-of-00029.bin", + "model.layers.36.self_attn.rotary_emb.inv_freq": "pytorch_model-00014-of-00029.bin", + "model.layers.36.self_attn.v_proj.weight": "pytorch_model-00013-of-00029.bin", + "model.layers.37.input_layernorm.weight": "pytorch_model-00014-of-00029.bin", + "model.layers.37.mlp.down_proj.weight": "pytorch_model-00014-of-00029.bin", + "model.layers.37.mlp.gate_proj.weight": "pytorch_model-00014-of-00029.bin", + "model.layers.37.mlp.up_proj.weight": "pytorch_model-00014-of-00029.bin", + "model.layers.37.post_attention_layernorm.weight": "pytorch_model-00014-of-00029.bin", + "model.layers.37.self_attn.k_proj.weight": "pytorch_model-00014-of-00029.bin", + "model.layers.37.self_attn.o_proj.weight": "pytorch_model-00014-of-00029.bin", + "model.layers.37.self_attn.q_proj.weight": "pytorch_model-00014-of-00029.bin", + "model.layers.37.self_attn.rotary_emb.inv_freq": "pytorch_model-00014-of-00029.bin", + "model.layers.37.self_attn.v_proj.weight": "pytorch_model-00014-of-00029.bin", + "model.layers.38.input_layernorm.weight": "pytorch_model-00014-of-00029.bin", + "model.layers.38.mlp.down_proj.weight": "pytorch_model-00014-of-00029.bin", + "model.layers.38.mlp.gate_proj.weight": "pytorch_model-00014-of-00029.bin", + "model.layers.38.mlp.up_proj.weight": "pytorch_model-00014-of-00029.bin", + "model.layers.38.post_attention_layernorm.weight": "pytorch_model-00014-of-00029.bin", + "model.layers.38.self_attn.k_proj.weight": "pytorch_model-00014-of-00029.bin", + "model.layers.38.self_attn.o_proj.weight": "pytorch_model-00014-of-00029.bin", + "model.layers.38.self_attn.q_proj.weight": "pytorch_model-00014-of-00029.bin", + "model.layers.38.self_attn.rotary_emb.inv_freq": "pytorch_model-00014-of-00029.bin", + "model.layers.38.self_attn.v_proj.weight": "pytorch_model-00014-of-00029.bin", + "model.layers.39.input_layernorm.weight": "pytorch_model-00015-of-00029.bin", + "model.layers.39.mlp.down_proj.weight": "pytorch_model-00015-of-00029.bin", + "model.layers.39.mlp.gate_proj.weight": "pytorch_model-00015-of-00029.bin", + "model.layers.39.mlp.up_proj.weight": "pytorch_model-00015-of-00029.bin", + "model.layers.39.post_attention_layernorm.weight": "pytorch_model-00015-of-00029.bin", + "model.layers.39.self_attn.k_proj.weight": "pytorch_model-00015-of-00029.bin", + "model.layers.39.self_attn.o_proj.weight": "pytorch_model-00015-of-00029.bin", + "model.layers.39.self_attn.q_proj.weight": "pytorch_model-00015-of-00029.bin", + "model.layers.39.self_attn.rotary_emb.inv_freq": "pytorch_model-00015-of-00029.bin", + "model.layers.39.self_attn.v_proj.weight": "pytorch_model-00015-of-00029.bin", + "model.layers.4.input_layernorm.weight": "pytorch_model-00002-of-00029.bin", + "model.layers.4.mlp.down_proj.weight": "pytorch_model-00002-of-00029.bin", + "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00002-of-00029.bin", + "model.layers.4.mlp.up_proj.weight": "pytorch_model-00002-of-00029.bin", + "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00002-of-00029.bin", + "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00002-of-00029.bin", + "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00002-of-00029.bin", + "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00002-of-00029.bin", + "model.layers.4.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00029.bin", + "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00002-of-00029.bin", + "model.layers.40.input_layernorm.weight": "pytorch_model-00015-of-00029.bin", + "model.layers.40.mlp.down_proj.weight": "pytorch_model-00015-of-00029.bin", + "model.layers.40.mlp.gate_proj.weight": "pytorch_model-00015-of-00029.bin", + "model.layers.40.mlp.up_proj.weight": "pytorch_model-00015-of-00029.bin", + "model.layers.40.post_attention_layernorm.weight": "pytorch_model-00015-of-00029.bin", + "model.layers.40.self_attn.k_proj.weight": "pytorch_model-00015-of-00029.bin", + "model.layers.40.self_attn.o_proj.weight": "pytorch_model-00015-of-00029.bin", + "model.layers.40.self_attn.q_proj.weight": "pytorch_model-00015-of-00029.bin", + "model.layers.40.self_attn.rotary_emb.inv_freq": "pytorch_model-00015-of-00029.bin", + "model.layers.40.self_attn.v_proj.weight": "pytorch_model-00015-of-00029.bin", + "model.layers.41.input_layernorm.weight": "pytorch_model-00016-of-00029.bin", + "model.layers.41.mlp.down_proj.weight": "pytorch_model-00016-of-00029.bin", + "model.layers.41.mlp.gate_proj.weight": "pytorch_model-00015-of-00029.bin", + "model.layers.41.mlp.up_proj.weight": "pytorch_model-00015-of-00029.bin", + "model.layers.41.post_attention_layernorm.weight": "pytorch_model-00016-of-00029.bin", + "model.layers.41.self_attn.k_proj.weight": "pytorch_model-00015-of-00029.bin", + "model.layers.41.self_attn.o_proj.weight": "pytorch_model-00015-of-00029.bin", + "model.layers.41.self_attn.q_proj.weight": "pytorch_model-00015-of-00029.bin", + "model.layers.41.self_attn.rotary_emb.inv_freq": "pytorch_model-00015-of-00029.bin", + "model.layers.41.self_attn.v_proj.weight": "pytorch_model-00015-of-00029.bin", + "model.layers.42.input_layernorm.weight": "pytorch_model-00016-of-00029.bin", + "model.layers.42.mlp.down_proj.weight": "pytorch_model-00016-of-00029.bin", + "model.layers.42.mlp.gate_proj.weight": "pytorch_model-00016-of-00029.bin", + "model.layers.42.mlp.up_proj.weight": "pytorch_model-00016-of-00029.bin", + "model.layers.42.post_attention_layernorm.weight": "pytorch_model-00016-of-00029.bin", + "model.layers.42.self_attn.k_proj.weight": "pytorch_model-00016-of-00029.bin", + "model.layers.42.self_attn.o_proj.weight": "pytorch_model-00016-of-00029.bin", + "model.layers.42.self_attn.q_proj.weight": "pytorch_model-00016-of-00029.bin", + "model.layers.42.self_attn.rotary_emb.inv_freq": "pytorch_model-00016-of-00029.bin", + "model.layers.42.self_attn.v_proj.weight": "pytorch_model-00016-of-00029.bin", + "model.layers.43.input_layernorm.weight": "pytorch_model-00016-of-00029.bin", + "model.layers.43.mlp.down_proj.weight": "pytorch_model-00016-of-00029.bin", + "model.layers.43.mlp.gate_proj.weight": "pytorch_model-00016-of-00029.bin", + "model.layers.43.mlp.up_proj.weight": "pytorch_model-00016-of-00029.bin", + "model.layers.43.post_attention_layernorm.weight": "pytorch_model-00016-of-00029.bin", + "model.layers.43.self_attn.k_proj.weight": "pytorch_model-00016-of-00029.bin", + "model.layers.43.self_attn.o_proj.weight": "pytorch_model-00016-of-00029.bin", + "model.layers.43.self_attn.q_proj.weight": "pytorch_model-00016-of-00029.bin", + "model.layers.43.self_attn.rotary_emb.inv_freq": "pytorch_model-00016-of-00029.bin", + "model.layers.43.self_attn.v_proj.weight": "pytorch_model-00016-of-00029.bin", + "model.layers.44.input_layernorm.weight": "pytorch_model-00017-of-00029.bin", + "model.layers.44.mlp.down_proj.weight": "pytorch_model-00017-of-00029.bin", + "model.layers.44.mlp.gate_proj.weight": "pytorch_model-00016-of-00029.bin", + "model.layers.44.mlp.up_proj.weight": "pytorch_model-00017-of-00029.bin", + "model.layers.44.post_attention_layernorm.weight": "pytorch_model-00017-of-00029.bin", + "model.layers.44.self_attn.k_proj.weight": "pytorch_model-00016-of-00029.bin", + "model.layers.44.self_attn.o_proj.weight": "pytorch_model-00016-of-00029.bin", + "model.layers.44.self_attn.q_proj.weight": "pytorch_model-00016-of-00029.bin", + "model.layers.44.self_attn.rotary_emb.inv_freq": "pytorch_model-00016-of-00029.bin", + "model.layers.44.self_attn.v_proj.weight": "pytorch_model-00016-of-00029.bin", + "model.layers.45.input_layernorm.weight": "pytorch_model-00017-of-00029.bin", + "model.layers.45.mlp.down_proj.weight": "pytorch_model-00017-of-00029.bin", + "model.layers.45.mlp.gate_proj.weight": "pytorch_model-00017-of-00029.bin", + "model.layers.45.mlp.up_proj.weight": "pytorch_model-00017-of-00029.bin", + "model.layers.45.post_attention_layernorm.weight": "pytorch_model-00017-of-00029.bin", + "model.layers.45.self_attn.k_proj.weight": "pytorch_model-00017-of-00029.bin", + "model.layers.45.self_attn.o_proj.weight": "pytorch_model-00017-of-00029.bin", + "model.layers.45.self_attn.q_proj.weight": "pytorch_model-00017-of-00029.bin", + "model.layers.45.self_attn.rotary_emb.inv_freq": "pytorch_model-00017-of-00029.bin", + "model.layers.45.self_attn.v_proj.weight": "pytorch_model-00017-of-00029.bin", + "model.layers.46.input_layernorm.weight": "pytorch_model-00017-of-00029.bin", + "model.layers.46.mlp.down_proj.weight": "pytorch_model-00017-of-00029.bin", + "model.layers.46.mlp.gate_proj.weight": "pytorch_model-00017-of-00029.bin", + "model.layers.46.mlp.up_proj.weight": "pytorch_model-00017-of-00029.bin", + "model.layers.46.post_attention_layernorm.weight": "pytorch_model-00017-of-00029.bin", + "model.layers.46.self_attn.k_proj.weight": "pytorch_model-00017-of-00029.bin", + "model.layers.46.self_attn.o_proj.weight": "pytorch_model-00017-of-00029.bin", + "model.layers.46.self_attn.q_proj.weight": "pytorch_model-00017-of-00029.bin", + "model.layers.46.self_attn.rotary_emb.inv_freq": "pytorch_model-00017-of-00029.bin", + "model.layers.46.self_attn.v_proj.weight": "pytorch_model-00017-of-00029.bin", + "model.layers.47.input_layernorm.weight": "pytorch_model-00018-of-00029.bin", + "model.layers.47.mlp.down_proj.weight": "pytorch_model-00018-of-00029.bin", + "model.layers.47.mlp.gate_proj.weight": "pytorch_model-00018-of-00029.bin", + "model.layers.47.mlp.up_proj.weight": "pytorch_model-00018-of-00029.bin", + "model.layers.47.post_attention_layernorm.weight": "pytorch_model-00018-of-00029.bin", + "model.layers.47.self_attn.k_proj.weight": "pytorch_model-00017-of-00029.bin", + "model.layers.47.self_attn.o_proj.weight": "pytorch_model-00017-of-00029.bin", + "model.layers.47.self_attn.q_proj.weight": "pytorch_model-00017-of-00029.bin", + "model.layers.47.self_attn.rotary_emb.inv_freq": "pytorch_model-00017-of-00029.bin", + "model.layers.47.self_attn.v_proj.weight": "pytorch_model-00017-of-00029.bin", + "model.layers.48.input_layernorm.weight": "pytorch_model-00018-of-00029.bin", + "model.layers.48.mlp.down_proj.weight": "pytorch_model-00018-of-00029.bin", + "model.layers.48.mlp.gate_proj.weight": "pytorch_model-00018-of-00029.bin", + "model.layers.48.mlp.up_proj.weight": "pytorch_model-00018-of-00029.bin", + "model.layers.48.post_attention_layernorm.weight": "pytorch_model-00018-of-00029.bin", + "model.layers.48.self_attn.k_proj.weight": "pytorch_model-00018-of-00029.bin", + "model.layers.48.self_attn.o_proj.weight": "pytorch_model-00018-of-00029.bin", + "model.layers.48.self_attn.q_proj.weight": "pytorch_model-00018-of-00029.bin", + "model.layers.48.self_attn.rotary_emb.inv_freq": "pytorch_model-00018-of-00029.bin", + "model.layers.48.self_attn.v_proj.weight": "pytorch_model-00018-of-00029.bin", + "model.layers.49.input_layernorm.weight": "pytorch_model-00018-of-00029.bin", + "model.layers.49.mlp.down_proj.weight": "pytorch_model-00018-of-00029.bin", + "model.layers.49.mlp.gate_proj.weight": "pytorch_model-00018-of-00029.bin", + "model.layers.49.mlp.up_proj.weight": "pytorch_model-00018-of-00029.bin", + "model.layers.49.post_attention_layernorm.weight": "pytorch_model-00018-of-00029.bin", + "model.layers.49.self_attn.k_proj.weight": "pytorch_model-00018-of-00029.bin", + "model.layers.49.self_attn.o_proj.weight": "pytorch_model-00018-of-00029.bin", + "model.layers.49.self_attn.q_proj.weight": "pytorch_model-00018-of-00029.bin", + "model.layers.49.self_attn.rotary_emb.inv_freq": "pytorch_model-00018-of-00029.bin", + "model.layers.49.self_attn.v_proj.weight": "pytorch_model-00018-of-00029.bin", + "model.layers.5.input_layernorm.weight": "pytorch_model-00003-of-00029.bin", + "model.layers.5.mlp.down_proj.weight": "pytorch_model-00003-of-00029.bin", + "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00003-of-00029.bin", + "model.layers.5.mlp.up_proj.weight": "pytorch_model-00003-of-00029.bin", + "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00003-of-00029.bin", + "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00002-of-00029.bin", + "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00002-of-00029.bin", + "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00002-of-00029.bin", + "model.layers.5.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00029.bin", + "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00002-of-00029.bin", + "model.layers.50.input_layernorm.weight": "pytorch_model-00019-of-00029.bin", + "model.layers.50.mlp.down_proj.weight": "pytorch_model-00019-of-00029.bin", + "model.layers.50.mlp.gate_proj.weight": "pytorch_model-00019-of-00029.bin", + "model.layers.50.mlp.up_proj.weight": "pytorch_model-00019-of-00029.bin", + "model.layers.50.post_attention_layernorm.weight": "pytorch_model-00019-of-00029.bin", + "model.layers.50.self_attn.k_proj.weight": "pytorch_model-00018-of-00029.bin", + "model.layers.50.self_attn.o_proj.weight": "pytorch_model-00019-of-00029.bin", + "model.layers.50.self_attn.q_proj.weight": "pytorch_model-00018-of-00029.bin", + "model.layers.50.self_attn.rotary_emb.inv_freq": "pytorch_model-00019-of-00029.bin", + "model.layers.50.self_attn.v_proj.weight": "pytorch_model-00018-of-00029.bin", + "model.layers.51.input_layernorm.weight": "pytorch_model-00019-of-00029.bin", + "model.layers.51.mlp.down_proj.weight": "pytorch_model-00019-of-00029.bin", + "model.layers.51.mlp.gate_proj.weight": "pytorch_model-00019-of-00029.bin", + "model.layers.51.mlp.up_proj.weight": "pytorch_model-00019-of-00029.bin", + "model.layers.51.post_attention_layernorm.weight": "pytorch_model-00019-of-00029.bin", + "model.layers.51.self_attn.k_proj.weight": "pytorch_model-00019-of-00029.bin", + "model.layers.51.self_attn.o_proj.weight": "pytorch_model-00019-of-00029.bin", + "model.layers.51.self_attn.q_proj.weight": "pytorch_model-00019-of-00029.bin", + "model.layers.51.self_attn.rotary_emb.inv_freq": "pytorch_model-00019-of-00029.bin", + "model.layers.51.self_attn.v_proj.weight": "pytorch_model-00019-of-00029.bin", + "model.layers.52.input_layernorm.weight": "pytorch_model-00019-of-00029.bin", + "model.layers.52.mlp.down_proj.weight": "pytorch_model-00019-of-00029.bin", + "model.layers.52.mlp.gate_proj.weight": "pytorch_model-00019-of-00029.bin", + "model.layers.52.mlp.up_proj.weight": "pytorch_model-00019-of-00029.bin", + "model.layers.52.post_attention_layernorm.weight": "pytorch_model-00019-of-00029.bin", + "model.layers.52.self_attn.k_proj.weight": "pytorch_model-00019-of-00029.bin", + "model.layers.52.self_attn.o_proj.weight": "pytorch_model-00019-of-00029.bin", + "model.layers.52.self_attn.q_proj.weight": "pytorch_model-00019-of-00029.bin", + "model.layers.52.self_attn.rotary_emb.inv_freq": "pytorch_model-00019-of-00029.bin", + "model.layers.52.self_attn.v_proj.weight": "pytorch_model-00019-of-00029.bin", + "model.layers.53.input_layernorm.weight": "pytorch_model-00020-of-00029.bin", + "model.layers.53.mlp.down_proj.weight": "pytorch_model-00020-of-00029.bin", + "model.layers.53.mlp.gate_proj.weight": "pytorch_model-00020-of-00029.bin", + "model.layers.53.mlp.up_proj.weight": "pytorch_model-00020-of-00029.bin", + "model.layers.53.post_attention_layernorm.weight": "pytorch_model-00020-of-00029.bin", + "model.layers.53.self_attn.k_proj.weight": "pytorch_model-00020-of-00029.bin", + "model.layers.53.self_attn.o_proj.weight": "pytorch_model-00020-of-00029.bin", + "model.layers.53.self_attn.q_proj.weight": "pytorch_model-00020-of-00029.bin", + "model.layers.53.self_attn.rotary_emb.inv_freq": "pytorch_model-00020-of-00029.bin", + "model.layers.53.self_attn.v_proj.weight": "pytorch_model-00020-of-00029.bin", + "model.layers.54.input_layernorm.weight": "pytorch_model-00020-of-00029.bin", + "model.layers.54.mlp.down_proj.weight": "pytorch_model-00020-of-00029.bin", + "model.layers.54.mlp.gate_proj.weight": "pytorch_model-00020-of-00029.bin", + "model.layers.54.mlp.up_proj.weight": "pytorch_model-00020-of-00029.bin", + "model.layers.54.post_attention_layernorm.weight": "pytorch_model-00020-of-00029.bin", + "model.layers.54.self_attn.k_proj.weight": "pytorch_model-00020-of-00029.bin", + "model.layers.54.self_attn.o_proj.weight": "pytorch_model-00020-of-00029.bin", + "model.layers.54.self_attn.q_proj.weight": "pytorch_model-00020-of-00029.bin", + "model.layers.54.self_attn.rotary_emb.inv_freq": "pytorch_model-00020-of-00029.bin", + "model.layers.54.self_attn.v_proj.weight": "pytorch_model-00020-of-00029.bin", + "model.layers.55.input_layernorm.weight": "pytorch_model-00021-of-00029.bin", + "model.layers.55.mlp.down_proj.weight": "pytorch_model-00021-of-00029.bin", + "model.layers.55.mlp.gate_proj.weight": "pytorch_model-00020-of-00029.bin", + "model.layers.55.mlp.up_proj.weight": "pytorch_model-00020-of-00029.bin", + "model.layers.55.post_attention_layernorm.weight": "pytorch_model-00021-of-00029.bin", + "model.layers.55.self_attn.k_proj.weight": "pytorch_model-00020-of-00029.bin", + "model.layers.55.self_attn.o_proj.weight": "pytorch_model-00020-of-00029.bin", + "model.layers.55.self_attn.q_proj.weight": "pytorch_model-00020-of-00029.bin", + "model.layers.55.self_attn.rotary_emb.inv_freq": "pytorch_model-00020-of-00029.bin", + "model.layers.55.self_attn.v_proj.weight": "pytorch_model-00020-of-00029.bin", + "model.layers.56.input_layernorm.weight": "pytorch_model-00021-of-00029.bin", + "model.layers.56.mlp.down_proj.weight": "pytorch_model-00021-of-00029.bin", + "model.layers.56.mlp.gate_proj.weight": "pytorch_model-00021-of-00029.bin", + "model.layers.56.mlp.up_proj.weight": "pytorch_model-00021-of-00029.bin", + "model.layers.56.post_attention_layernorm.weight": "pytorch_model-00021-of-00029.bin", + "model.layers.56.self_attn.k_proj.weight": "pytorch_model-00021-of-00029.bin", + "model.layers.56.self_attn.o_proj.weight": "pytorch_model-00021-of-00029.bin", + "model.layers.56.self_attn.q_proj.weight": "pytorch_model-00021-of-00029.bin", + "model.layers.56.self_attn.rotary_emb.inv_freq": "pytorch_model-00021-of-00029.bin", + "model.layers.56.self_attn.v_proj.weight": "pytorch_model-00021-of-00029.bin", + "model.layers.57.input_layernorm.weight": "pytorch_model-00021-of-00029.bin", + "model.layers.57.mlp.down_proj.weight": "pytorch_model-00021-of-00029.bin", + "model.layers.57.mlp.gate_proj.weight": "pytorch_model-00021-of-00029.bin", + "model.layers.57.mlp.up_proj.weight": "pytorch_model-00021-of-00029.bin", + "model.layers.57.post_attention_layernorm.weight": "pytorch_model-00021-of-00029.bin", + "model.layers.57.self_attn.k_proj.weight": "pytorch_model-00021-of-00029.bin", + "model.layers.57.self_attn.o_proj.weight": "pytorch_model-00021-of-00029.bin", + "model.layers.57.self_attn.q_proj.weight": "pytorch_model-00021-of-00029.bin", + "model.layers.57.self_attn.rotary_emb.inv_freq": "pytorch_model-00021-of-00029.bin", + "model.layers.57.self_attn.v_proj.weight": "pytorch_model-00021-of-00029.bin", + "model.layers.58.input_layernorm.weight": "pytorch_model-00022-of-00029.bin", + "model.layers.58.mlp.down_proj.weight": "pytorch_model-00022-of-00029.bin", + "model.layers.58.mlp.gate_proj.weight": "pytorch_model-00021-of-00029.bin", + "model.layers.58.mlp.up_proj.weight": "pytorch_model-00022-of-00029.bin", + "model.layers.58.post_attention_layernorm.weight": "pytorch_model-00022-of-00029.bin", + "model.layers.58.self_attn.k_proj.weight": "pytorch_model-00021-of-00029.bin", + "model.layers.58.self_attn.o_proj.weight": "pytorch_model-00021-of-00029.bin", + "model.layers.58.self_attn.q_proj.weight": "pytorch_model-00021-of-00029.bin", + "model.layers.58.self_attn.rotary_emb.inv_freq": "pytorch_model-00021-of-00029.bin", + "model.layers.58.self_attn.v_proj.weight": "pytorch_model-00021-of-00029.bin", + "model.layers.59.input_layernorm.weight": "pytorch_model-00022-of-00029.bin", + "model.layers.59.mlp.down_proj.weight": "pytorch_model-00022-of-00029.bin", + "model.layers.59.mlp.gate_proj.weight": "pytorch_model-00022-of-00029.bin", + "model.layers.59.mlp.up_proj.weight": "pytorch_model-00022-of-00029.bin", + "model.layers.59.post_attention_layernorm.weight": "pytorch_model-00022-of-00029.bin", + "model.layers.59.self_attn.k_proj.weight": "pytorch_model-00022-of-00029.bin", + "model.layers.59.self_attn.o_proj.weight": "pytorch_model-00022-of-00029.bin", + "model.layers.59.self_attn.q_proj.weight": "pytorch_model-00022-of-00029.bin", + "model.layers.59.self_attn.rotary_emb.inv_freq": "pytorch_model-00022-of-00029.bin", + "model.layers.59.self_attn.v_proj.weight": "pytorch_model-00022-of-00029.bin", + "model.layers.6.input_layernorm.weight": "pytorch_model-00003-of-00029.bin", + "model.layers.6.mlp.down_proj.weight": "pytorch_model-00003-of-00029.bin", + "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00003-of-00029.bin", + "model.layers.6.mlp.up_proj.weight": "pytorch_model-00003-of-00029.bin", + "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00003-of-00029.bin", + "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00003-of-00029.bin", + "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00003-of-00029.bin", + "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00003-of-00029.bin", + "model.layers.6.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00029.bin", + "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00003-of-00029.bin", + "model.layers.60.input_layernorm.weight": "pytorch_model-00022-of-00029.bin", + "model.layers.60.mlp.down_proj.weight": "pytorch_model-00022-of-00029.bin", + "model.layers.60.mlp.gate_proj.weight": "pytorch_model-00022-of-00029.bin", + "model.layers.60.mlp.up_proj.weight": "pytorch_model-00022-of-00029.bin", + "model.layers.60.post_attention_layernorm.weight": "pytorch_model-00022-of-00029.bin", + "model.layers.60.self_attn.k_proj.weight": "pytorch_model-00022-of-00029.bin", + "model.layers.60.self_attn.o_proj.weight": "pytorch_model-00022-of-00029.bin", + "model.layers.60.self_attn.q_proj.weight": "pytorch_model-00022-of-00029.bin", + "model.layers.60.self_attn.rotary_emb.inv_freq": "pytorch_model-00022-of-00029.bin", + "model.layers.60.self_attn.v_proj.weight": "pytorch_model-00022-of-00029.bin", + "model.layers.61.input_layernorm.weight": "pytorch_model-00023-of-00029.bin", + "model.layers.61.mlp.down_proj.weight": "pytorch_model-00023-of-00029.bin", + "model.layers.61.mlp.gate_proj.weight": "pytorch_model-00023-of-00029.bin", + "model.layers.61.mlp.up_proj.weight": "pytorch_model-00023-of-00029.bin", + "model.layers.61.post_attention_layernorm.weight": "pytorch_model-00023-of-00029.bin", + "model.layers.61.self_attn.k_proj.weight": "pytorch_model-00022-of-00029.bin", + "model.layers.61.self_attn.o_proj.weight": "pytorch_model-00022-of-00029.bin", + "model.layers.61.self_attn.q_proj.weight": "pytorch_model-00022-of-00029.bin", + "model.layers.61.self_attn.rotary_emb.inv_freq": "pytorch_model-00022-of-00029.bin", + "model.layers.61.self_attn.v_proj.weight": "pytorch_model-00022-of-00029.bin", + "model.layers.62.input_layernorm.weight": "pytorch_model-00023-of-00029.bin", + "model.layers.62.mlp.down_proj.weight": "pytorch_model-00023-of-00029.bin", + "model.layers.62.mlp.gate_proj.weight": "pytorch_model-00023-of-00029.bin", + "model.layers.62.mlp.up_proj.weight": "pytorch_model-00023-of-00029.bin", + "model.layers.62.post_attention_layernorm.weight": "pytorch_model-00023-of-00029.bin", + "model.layers.62.self_attn.k_proj.weight": "pytorch_model-00023-of-00029.bin", + "model.layers.62.self_attn.o_proj.weight": "pytorch_model-00023-of-00029.bin", + "model.layers.62.self_attn.q_proj.weight": "pytorch_model-00023-of-00029.bin", + "model.layers.62.self_attn.rotary_emb.inv_freq": "pytorch_model-00023-of-00029.bin", + "model.layers.62.self_attn.v_proj.weight": "pytorch_model-00023-of-00029.bin", + "model.layers.63.input_layernorm.weight": "pytorch_model-00023-of-00029.bin", + "model.layers.63.mlp.down_proj.weight": "pytorch_model-00023-of-00029.bin", + "model.layers.63.mlp.gate_proj.weight": "pytorch_model-00023-of-00029.bin", + "model.layers.63.mlp.up_proj.weight": "pytorch_model-00023-of-00029.bin", + "model.layers.63.post_attention_layernorm.weight": "pytorch_model-00023-of-00029.bin", + "model.layers.63.self_attn.k_proj.weight": "pytorch_model-00023-of-00029.bin", + "model.layers.63.self_attn.o_proj.weight": "pytorch_model-00023-of-00029.bin", + "model.layers.63.self_attn.q_proj.weight": "pytorch_model-00023-of-00029.bin", + "model.layers.63.self_attn.rotary_emb.inv_freq": "pytorch_model-00023-of-00029.bin", + "model.layers.63.self_attn.v_proj.weight": "pytorch_model-00023-of-00029.bin", + "model.layers.64.input_layernorm.weight": "pytorch_model-00024-of-00029.bin", + "model.layers.64.mlp.down_proj.weight": "pytorch_model-00024-of-00029.bin", + "model.layers.64.mlp.gate_proj.weight": "pytorch_model-00024-of-00029.bin", + "model.layers.64.mlp.up_proj.weight": "pytorch_model-00024-of-00029.bin", + "model.layers.64.post_attention_layernorm.weight": "pytorch_model-00024-of-00029.bin", + "model.layers.64.self_attn.k_proj.weight": "pytorch_model-00023-of-00029.bin", + "model.layers.64.self_attn.o_proj.weight": "pytorch_model-00024-of-00029.bin", + "model.layers.64.self_attn.q_proj.weight": "pytorch_model-00023-of-00029.bin", + "model.layers.64.self_attn.rotary_emb.inv_freq": "pytorch_model-00024-of-00029.bin", + "model.layers.64.self_attn.v_proj.weight": "pytorch_model-00023-of-00029.bin", + "model.layers.65.input_layernorm.weight": "pytorch_model-00024-of-00029.bin", + "model.layers.65.mlp.down_proj.weight": "pytorch_model-00024-of-00029.bin", + "model.layers.65.mlp.gate_proj.weight": "pytorch_model-00024-of-00029.bin", + "model.layers.65.mlp.up_proj.weight": "pytorch_model-00024-of-00029.bin", + "model.layers.65.post_attention_layernorm.weight": "pytorch_model-00024-of-00029.bin", + "model.layers.65.self_attn.k_proj.weight": "pytorch_model-00024-of-00029.bin", + "model.layers.65.self_attn.o_proj.weight": "pytorch_model-00024-of-00029.bin", + "model.layers.65.self_attn.q_proj.weight": "pytorch_model-00024-of-00029.bin", + "model.layers.65.self_attn.rotary_emb.inv_freq": "pytorch_model-00024-of-00029.bin", + "model.layers.65.self_attn.v_proj.weight": "pytorch_model-00024-of-00029.bin", + "model.layers.66.input_layernorm.weight": "pytorch_model-00024-of-00029.bin", + "model.layers.66.mlp.down_proj.weight": "pytorch_model-00024-of-00029.bin", + "model.layers.66.mlp.gate_proj.weight": "pytorch_model-00024-of-00029.bin", + "model.layers.66.mlp.up_proj.weight": "pytorch_model-00024-of-00029.bin", + "model.layers.66.post_attention_layernorm.weight": "pytorch_model-00024-of-00029.bin", + "model.layers.66.self_attn.k_proj.weight": "pytorch_model-00024-of-00029.bin", + "model.layers.66.self_attn.o_proj.weight": "pytorch_model-00024-of-00029.bin", + "model.layers.66.self_attn.q_proj.weight": "pytorch_model-00024-of-00029.bin", + "model.layers.66.self_attn.rotary_emb.inv_freq": "pytorch_model-00024-of-00029.bin", + "model.layers.66.self_attn.v_proj.weight": "pytorch_model-00024-of-00029.bin", + "model.layers.67.input_layernorm.weight": "pytorch_model-00025-of-00029.bin", + "model.layers.67.mlp.down_proj.weight": "pytorch_model-00025-of-00029.bin", + "model.layers.67.mlp.gate_proj.weight": "pytorch_model-00025-of-00029.bin", + "model.layers.67.mlp.up_proj.weight": "pytorch_model-00025-of-00029.bin", + "model.layers.67.post_attention_layernorm.weight": "pytorch_model-00025-of-00029.bin", + "model.layers.67.self_attn.k_proj.weight": "pytorch_model-00025-of-00029.bin", + "model.layers.67.self_attn.o_proj.weight": "pytorch_model-00025-of-00029.bin", + "model.layers.67.self_attn.q_proj.weight": "pytorch_model-00025-of-00029.bin", + "model.layers.67.self_attn.rotary_emb.inv_freq": "pytorch_model-00025-of-00029.bin", + "model.layers.67.self_attn.v_proj.weight": "pytorch_model-00025-of-00029.bin", + "model.layers.68.input_layernorm.weight": "pytorch_model-00025-of-00029.bin", + "model.layers.68.mlp.down_proj.weight": "pytorch_model-00025-of-00029.bin", + "model.layers.68.mlp.gate_proj.weight": "pytorch_model-00025-of-00029.bin", + "model.layers.68.mlp.up_proj.weight": "pytorch_model-00025-of-00029.bin", + "model.layers.68.post_attention_layernorm.weight": "pytorch_model-00025-of-00029.bin", + "model.layers.68.self_attn.k_proj.weight": "pytorch_model-00025-of-00029.bin", + "model.layers.68.self_attn.o_proj.weight": "pytorch_model-00025-of-00029.bin", + "model.layers.68.self_attn.q_proj.weight": "pytorch_model-00025-of-00029.bin", + "model.layers.68.self_attn.rotary_emb.inv_freq": "pytorch_model-00025-of-00029.bin", + "model.layers.68.self_attn.v_proj.weight": "pytorch_model-00025-of-00029.bin", + "model.layers.69.input_layernorm.weight": "pytorch_model-00026-of-00029.bin", + "model.layers.69.mlp.down_proj.weight": "pytorch_model-00026-of-00029.bin", + "model.layers.69.mlp.gate_proj.weight": "pytorch_model-00025-of-00029.bin", + "model.layers.69.mlp.up_proj.weight": "pytorch_model-00025-of-00029.bin", + "model.layers.69.post_attention_layernorm.weight": "pytorch_model-00026-of-00029.bin", + "model.layers.69.self_attn.k_proj.weight": "pytorch_model-00025-of-00029.bin", + "model.layers.69.self_attn.o_proj.weight": "pytorch_model-00025-of-00029.bin", + "model.layers.69.self_attn.q_proj.weight": "pytorch_model-00025-of-00029.bin", + "model.layers.69.self_attn.rotary_emb.inv_freq": "pytorch_model-00025-of-00029.bin", + "model.layers.69.self_attn.v_proj.weight": "pytorch_model-00025-of-00029.bin", + "model.layers.7.input_layernorm.weight": "pytorch_model-00003-of-00029.bin", + "model.layers.7.mlp.down_proj.weight": "pytorch_model-00003-of-00029.bin", + "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00003-of-00029.bin", + "model.layers.7.mlp.up_proj.weight": "pytorch_model-00003-of-00029.bin", + "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00003-of-00029.bin", + "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00003-of-00029.bin", + "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00003-of-00029.bin", + "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00003-of-00029.bin", + "model.layers.7.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00029.bin", + "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00003-of-00029.bin", + "model.layers.70.input_layernorm.weight": "pytorch_model-00026-of-00029.bin", + "model.layers.70.mlp.down_proj.weight": "pytorch_model-00026-of-00029.bin", + "model.layers.70.mlp.gate_proj.weight": "pytorch_model-00026-of-00029.bin", + "model.layers.70.mlp.up_proj.weight": "pytorch_model-00026-of-00029.bin", + "model.layers.70.post_attention_layernorm.weight": "pytorch_model-00026-of-00029.bin", + "model.layers.70.self_attn.k_proj.weight": "pytorch_model-00026-of-00029.bin", + "model.layers.70.self_attn.o_proj.weight": "pytorch_model-00026-of-00029.bin", + "model.layers.70.self_attn.q_proj.weight": "pytorch_model-00026-of-00029.bin", + "model.layers.70.self_attn.rotary_emb.inv_freq": "pytorch_model-00026-of-00029.bin", + "model.layers.70.self_attn.v_proj.weight": "pytorch_model-00026-of-00029.bin", + "model.layers.71.input_layernorm.weight": "pytorch_model-00026-of-00029.bin", + "model.layers.71.mlp.down_proj.weight": "pytorch_model-00026-of-00029.bin", + "model.layers.71.mlp.gate_proj.weight": "pytorch_model-00026-of-00029.bin", + "model.layers.71.mlp.up_proj.weight": "pytorch_model-00026-of-00029.bin", + "model.layers.71.post_attention_layernorm.weight": "pytorch_model-00026-of-00029.bin", + "model.layers.71.self_attn.k_proj.weight": "pytorch_model-00026-of-00029.bin", + "model.layers.71.self_attn.o_proj.weight": "pytorch_model-00026-of-00029.bin", + "model.layers.71.self_attn.q_proj.weight": "pytorch_model-00026-of-00029.bin", + "model.layers.71.self_attn.rotary_emb.inv_freq": "pytorch_model-00026-of-00029.bin", + "model.layers.71.self_attn.v_proj.weight": "pytorch_model-00026-of-00029.bin", + "model.layers.72.input_layernorm.weight": "pytorch_model-00027-of-00029.bin", + "model.layers.72.mlp.down_proj.weight": "pytorch_model-00027-of-00029.bin", + "model.layers.72.mlp.gate_proj.weight": "pytorch_model-00026-of-00029.bin", + "model.layers.72.mlp.up_proj.weight": "pytorch_model-00027-of-00029.bin", + "model.layers.72.post_attention_layernorm.weight": "pytorch_model-00027-of-00029.bin", + "model.layers.72.self_attn.k_proj.weight": "pytorch_model-00026-of-00029.bin", + "model.layers.72.self_attn.o_proj.weight": "pytorch_model-00026-of-00029.bin", + "model.layers.72.self_attn.q_proj.weight": "pytorch_model-00026-of-00029.bin", + "model.layers.72.self_attn.rotary_emb.inv_freq": "pytorch_model-00026-of-00029.bin", + "model.layers.72.self_attn.v_proj.weight": "pytorch_model-00026-of-00029.bin", + "model.layers.73.input_layernorm.weight": "pytorch_model-00027-of-00029.bin", + "model.layers.73.mlp.down_proj.weight": "pytorch_model-00027-of-00029.bin", + "model.layers.73.mlp.gate_proj.weight": "pytorch_model-00027-of-00029.bin", + "model.layers.73.mlp.up_proj.weight": "pytorch_model-00027-of-00029.bin", + "model.layers.73.post_attention_layernorm.weight": "pytorch_model-00027-of-00029.bin", + "model.layers.73.self_attn.k_proj.weight": "pytorch_model-00027-of-00029.bin", + "model.layers.73.self_attn.o_proj.weight": "pytorch_model-00027-of-00029.bin", + "model.layers.73.self_attn.q_proj.weight": "pytorch_model-00027-of-00029.bin", + "model.layers.73.self_attn.rotary_emb.inv_freq": "pytorch_model-00027-of-00029.bin", + "model.layers.73.self_attn.v_proj.weight": "pytorch_model-00027-of-00029.bin", + "model.layers.74.input_layernorm.weight": "pytorch_model-00027-of-00029.bin", + "model.layers.74.mlp.down_proj.weight": "pytorch_model-00027-of-00029.bin", + "model.layers.74.mlp.gate_proj.weight": "pytorch_model-00027-of-00029.bin", + "model.layers.74.mlp.up_proj.weight": "pytorch_model-00027-of-00029.bin", + "model.layers.74.post_attention_layernorm.weight": "pytorch_model-00027-of-00029.bin", + "model.layers.74.self_attn.k_proj.weight": "pytorch_model-00027-of-00029.bin", + "model.layers.74.self_attn.o_proj.weight": "pytorch_model-00027-of-00029.bin", + "model.layers.74.self_attn.q_proj.weight": "pytorch_model-00027-of-00029.bin", + "model.layers.74.self_attn.rotary_emb.inv_freq": "pytorch_model-00027-of-00029.bin", + "model.layers.74.self_attn.v_proj.weight": "pytorch_model-00027-of-00029.bin", + "model.layers.75.input_layernorm.weight": "pytorch_model-00028-of-00029.bin", + "model.layers.75.mlp.down_proj.weight": "pytorch_model-00028-of-00029.bin", + "model.layers.75.mlp.gate_proj.weight": "pytorch_model-00028-of-00029.bin", + "model.layers.75.mlp.up_proj.weight": "pytorch_model-00028-of-00029.bin", + "model.layers.75.post_attention_layernorm.weight": "pytorch_model-00028-of-00029.bin", + "model.layers.75.self_attn.k_proj.weight": "pytorch_model-00027-of-00029.bin", + "model.layers.75.self_attn.o_proj.weight": "pytorch_model-00027-of-00029.bin", + "model.layers.75.self_attn.q_proj.weight": "pytorch_model-00027-of-00029.bin", + "model.layers.75.self_attn.rotary_emb.inv_freq": "pytorch_model-00027-of-00029.bin", + "model.layers.75.self_attn.v_proj.weight": "pytorch_model-00027-of-00029.bin", + "model.layers.76.input_layernorm.weight": "pytorch_model-00028-of-00029.bin", + "model.layers.76.mlp.down_proj.weight": "pytorch_model-00028-of-00029.bin", + "model.layers.76.mlp.gate_proj.weight": "pytorch_model-00028-of-00029.bin", + "model.layers.76.mlp.up_proj.weight": "pytorch_model-00028-of-00029.bin", + "model.layers.76.post_attention_layernorm.weight": "pytorch_model-00028-of-00029.bin", + "model.layers.76.self_attn.k_proj.weight": "pytorch_model-00028-of-00029.bin", + "model.layers.76.self_attn.o_proj.weight": "pytorch_model-00028-of-00029.bin", + "model.layers.76.self_attn.q_proj.weight": "pytorch_model-00028-of-00029.bin", + "model.layers.76.self_attn.rotary_emb.inv_freq": "pytorch_model-00028-of-00029.bin", + "model.layers.76.self_attn.v_proj.weight": "pytorch_model-00028-of-00029.bin", + "model.layers.77.input_layernorm.weight": "pytorch_model-00028-of-00029.bin", + "model.layers.77.mlp.down_proj.weight": "pytorch_model-00028-of-00029.bin", + "model.layers.77.mlp.gate_proj.weight": "pytorch_model-00028-of-00029.bin", + "model.layers.77.mlp.up_proj.weight": "pytorch_model-00028-of-00029.bin", + "model.layers.77.post_attention_layernorm.weight": "pytorch_model-00028-of-00029.bin", + "model.layers.77.self_attn.k_proj.weight": "pytorch_model-00028-of-00029.bin", + "model.layers.77.self_attn.o_proj.weight": "pytorch_model-00028-of-00029.bin", + "model.layers.77.self_attn.q_proj.weight": "pytorch_model-00028-of-00029.bin", + "model.layers.77.self_attn.rotary_emb.inv_freq": "pytorch_model-00028-of-00029.bin", + "model.layers.77.self_attn.v_proj.weight": "pytorch_model-00028-of-00029.bin", + "model.layers.78.input_layernorm.weight": "pytorch_model-00029-of-00029.bin", + "model.layers.78.mlp.down_proj.weight": "pytorch_model-00029-of-00029.bin", + "model.layers.78.mlp.gate_proj.weight": "pytorch_model-00029-of-00029.bin", + "model.layers.78.mlp.up_proj.weight": "pytorch_model-00029-of-00029.bin", + "model.layers.78.post_attention_layernorm.weight": "pytorch_model-00029-of-00029.bin", + "model.layers.78.self_attn.k_proj.weight": "pytorch_model-00028-of-00029.bin", + "model.layers.78.self_attn.o_proj.weight": "pytorch_model-00029-of-00029.bin", + "model.layers.78.self_attn.q_proj.weight": "pytorch_model-00028-of-00029.bin", + "model.layers.78.self_attn.rotary_emb.inv_freq": "pytorch_model-00029-of-00029.bin", + "model.layers.78.self_attn.v_proj.weight": "pytorch_model-00028-of-00029.bin", + "model.layers.79.input_layernorm.weight": "pytorch_model-00029-of-00029.bin", + "model.layers.79.mlp.down_proj.weight": "pytorch_model-00029-of-00029.bin", + "model.layers.79.mlp.gate_proj.weight": "pytorch_model-00029-of-00029.bin", + "model.layers.79.mlp.up_proj.weight": "pytorch_model-00029-of-00029.bin", + "model.layers.79.post_attention_layernorm.weight": "pytorch_model-00029-of-00029.bin", + "model.layers.79.self_attn.k_proj.weight": "pytorch_model-00029-of-00029.bin", + "model.layers.79.self_attn.o_proj.weight": "pytorch_model-00029-of-00029.bin", + "model.layers.79.self_attn.q_proj.weight": "pytorch_model-00029-of-00029.bin", + "model.layers.79.self_attn.rotary_emb.inv_freq": "pytorch_model-00029-of-00029.bin", + "model.layers.79.self_attn.v_proj.weight": "pytorch_model-00029-of-00029.bin", + "model.layers.8.input_layernorm.weight": "pytorch_model-00004-of-00029.bin", + "model.layers.8.mlp.down_proj.weight": "pytorch_model-00004-of-00029.bin", + "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00004-of-00029.bin", + "model.layers.8.mlp.up_proj.weight": "pytorch_model-00004-of-00029.bin", + "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00004-of-00029.bin", + "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00003-of-00029.bin", + "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00004-of-00029.bin", + "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00003-of-00029.bin", + "model.layers.8.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00029.bin", + "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00003-of-00029.bin", + "model.layers.9.input_layernorm.weight": "pytorch_model-00004-of-00029.bin", + "model.layers.9.mlp.down_proj.weight": "pytorch_model-00004-of-00029.bin", + "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00004-of-00029.bin", + "model.layers.9.mlp.up_proj.weight": "pytorch_model-00004-of-00029.bin", + "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00004-of-00029.bin", + "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00004-of-00029.bin", + "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00004-of-00029.bin", + "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00004-of-00029.bin", + "model.layers.9.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00029.bin", + "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00004-of-00029.bin", + "model.norm.weight": "pytorch_model-00029-of-00029.bin" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..21edfc81710e2dc1e41bd42b3a51bfff845568cf --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "[PAD]", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..6c00c742ce03c627d6cd5b795984876fa49fa899 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..508754bfb1263631e39be7a2cd3577f6b5657c16 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,35 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "bos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "clean_up_tokenization_spaces": false, + "eos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "legacy": false, + "model_max_length": 4096, + "pad_token": null, + "padding_side": "right", + "sp_model_kwargs": {}, + "tokenizer_class": "LlamaTokenizer", + "unk_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +}