diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..dd0148c858627019cc07908aadf7d17c3f911c90 --- /dev/null +++ b/config.json @@ -0,0 +1,30 @@ +{ + "_name_or_path": "./alpaca_dragon", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": true, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 24576, + "max_position_embeddings": 32768, + "model_type": "llama", + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 64, + "pad_token_id": 151643, + "pretraining_tp": 1, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "seq_length": 32768, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.37.2", + "use_cache": true, + "vocab_size": 152064 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fb8a1e45b2d281c0170b6791a9ca30769b8bdc95 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "eos_token_id": 151643, + "pad_token_id": 151643, + "transformers_version": "4.37.2" +} diff --git a/model-00001-of-00063.safetensors b/model-00001-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..844a0d27d17adfe2337333db7cd55498255ab0bd --- /dev/null +++ b/model-00001-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93c877a3c02521f0fbff6812c0163a07aa9e3f24faaf2e7a0f4a3c6a78c88670 +size 4982833288 diff --git a/model-00002-of-00063.safetensors b/model-00002-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..afbe84ac9b70db3261b24d8f248e0416d02baf8e --- /dev/null +++ b/model-00002-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a98c92f4b6c9b92499f5a70f4584d78fd06919349c551b15c3ab0f13ed9c68f +size 4563732824 diff --git a/model-00003-of-00063.safetensors b/model-00003-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..47995aac028d85aa0008aaede34859155f00a5b8 --- /dev/null +++ b/model-00003-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8ef9240eafbfe9122a11f93867be86d7a0f530cab7a4efb725fc60dae382b22 +size 4295165512 diff --git a/model-00004-of-00063.safetensors b/model-00004-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0539c70b0ad955e6ad278dadffe616cb6f1aeed1 --- /dev/null +++ b/model-00004-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1092a4b6657ee63f3601103086485b0451017a9015f97bdccb4c1bd8b32b41ba +size 4295165504 diff --git a/model-00005-of-00063.safetensors b/model-00005-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4d4c40a2e9d652353fb76f0e982c98585c54dc73 --- /dev/null +++ b/model-00005-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7df17c368c979497868259e9579e0bb458f8a2eac72698cb867fa19555a72605 +size 4832168168 diff --git a/model-00006-of-00063.safetensors b/model-00006-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..51dbcec75f34d0c6f48df3dd070c43c69f276e63 --- /dev/null +++ b/model-00006-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55ceca019831eebb6f764caa7536e28b38bef6b9f7fc5fdf1ae0c532ddf56648 +size 4832102408 diff --git a/model-00007-of-00063.safetensors b/model-00007-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5adb7b0e6abbd9425b148578975d9d6236d7a092 --- /dev/null +++ b/model-00007-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffb7b82c2eb2b6a2419871ad961c1238be538b4558c39da97b7b452b6e898dc5 +size 4295165504 diff --git a/model-00008-of-00063.safetensors b/model-00008-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f9fa1e52e74020f1d5266e04ef1ba1db5312340c --- /dev/null +++ b/model-00008-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0339c0a4e0a479449d2f5567592cc831293ee4473292374a9876dfc03dbe2ffb +size 4832168168 diff --git a/model-00009-of-00063.safetensors b/model-00009-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f467af3b64dd00c37b8bdbe69b0f8cb58d337d02 --- /dev/null +++ b/model-00009-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e19b35f13af81b46f3fe7af748108ef946a7ff870fd72831a9de014cfd3af9f +size 4832102424 diff --git a/model-00010-of-00063.safetensors b/model-00010-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8a4c7d76195961f2c04ac4db1428fc5db74868a6 --- /dev/null +++ b/model-00010-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4eb63611d3961db7c7d77c50c94ad724eaeb0ad2a4f2f2cb764016859189215 +size 4295165520 diff --git a/model-00011-of-00063.safetensors b/model-00011-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cb0133be4c4268b3d42ab63bf1585bb294d2e591 --- /dev/null +++ b/model-00011-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d52a06da6f3fef5a7b9a5481ab4488e6ecc03b8cc148d9cacff1597b505d2707 +size 4832168192 diff --git a/model-00012-of-00063.safetensors b/model-00012-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..08196d64a0f1675b7895cdb82de700c89300d734 --- /dev/null +++ b/model-00012-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0a4ed319afc5686140a5864d5ac81b2ab5f5a002776fd847c0086f2dc88915f +size 4832102424 diff --git a/model-00013-of-00063.safetensors b/model-00013-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9895d739365568cd51819711efec79fa4f0e3af0 --- /dev/null +++ b/model-00013-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbf70fb5f207d5c6763bbfbb250b522103acb2761fa461c360dd1e194284cbdf +size 4295165520 diff --git a/model-00014-of-00063.safetensors b/model-00014-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0896809f7ab540bff76cd9d82e5d6dadf462d073 --- /dev/null +++ b/model-00014-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97445490724b2bf6cb6c91d3690d8c7fd497c68a7bc0b3e5f72c9b60abcd2fce +size 4832168192 diff --git a/model-00015-of-00063.safetensors b/model-00015-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0a8cc8d315d2dd16670c8c2e47caa1d3e367ef5d --- /dev/null +++ b/model-00015-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1a7cdbf94fdc3c9b51c0a9d98c95f73ab86890c5d9fe197634b44d69c156042 +size 4832102424 diff --git a/model-00016-of-00063.safetensors b/model-00016-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1aa5b259de51da45c073db368378b915da1020a2 --- /dev/null +++ b/model-00016-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc07681b1a1ca88cd62d03384d85a1aad3e2ef86f6a3f49643bad6931fc5b531 +size 4295165520 diff --git a/model-00017-of-00063.safetensors b/model-00017-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ee2b442e3cb73c96b2b5d3c4313456b5d7c2f092 --- /dev/null +++ b/model-00017-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39cf63b5cd0449d2d769184983591b0ede6ec17f161c69420939f00ab0454fb3 +size 4832168192 diff --git a/model-00018-of-00063.safetensors b/model-00018-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..644ea534c681c7c5fc33f01d5ab33ada43342381 --- /dev/null +++ b/model-00018-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92f478714d92e797071f350f5f826b17faecce3e01ca46b06526513781f0f4d9 +size 4832102424 diff --git a/model-00019-of-00063.safetensors b/model-00019-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b39ae7c43ab3a6bcf6c0e67536fb113acecf2915 --- /dev/null +++ b/model-00019-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49ef74836db85e5a4f566e3e037508d859bf9d05c86c787b9569d5186abb3538 +size 4295165520 diff --git a/model-00020-of-00063.safetensors b/model-00020-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..84682fda0858a9152d802217b2e2ad4bc378154a --- /dev/null +++ b/model-00020-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dc8574d35ae909424828fbd08364d86b1fdd0bb464c1a50bd23df4f2a15751f +size 4832168192 diff --git a/model-00021-of-00063.safetensors b/model-00021-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ed945377b10b66879ca9fc773305bba54390b815 --- /dev/null +++ b/model-00021-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9b1f6c33a14624e17f9a005637c6932fe30b21f06c960b2e8d465fb716058fa +size 4832102424 diff --git a/model-00022-of-00063.safetensors b/model-00022-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4718aafcb937143557045f266a84196951c93b18 --- /dev/null +++ b/model-00022-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:048048e8183933c58dd5e8a99b0e3a914f77254a7bbad5d96f229d53fe5e14ce +size 4295165520 diff --git a/model-00023-of-00063.safetensors b/model-00023-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d61a29fcfedb39db001ae1f3c3d43c2bdcffd422 --- /dev/null +++ b/model-00023-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcb13d6b448b99d5ae4fcbe3783e4d4e6e1cac50995729651ba22544abae26b6 +size 4832168192 diff --git a/model-00024-of-00063.safetensors b/model-00024-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..96fb8d5019d37f680052a3140216cbc7c5dfe2ee --- /dev/null +++ b/model-00024-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d666e32bc6ea5920d5ba3293d182d7b7d245bf6132244c21559f78974d62c180 +size 4832102424 diff --git a/model-00025-of-00063.safetensors b/model-00025-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b2eea58d4ad33ca0a4d1a9c250f0913ae82ec137 --- /dev/null +++ b/model-00025-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35acb58a2404477d462292a2400b4eb988b1f456bb2c63eb499e785d296c7cd1 +size 4295165520 diff --git a/model-00026-of-00063.safetensors b/model-00026-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6cd90699d4497f075c7db946bdc751679895f461 --- /dev/null +++ b/model-00026-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e333fb348f29d300d9fc8133c0e2db3df308321365d7aad70c1da9c44ea5f6a +size 4832168192 diff --git a/model-00027-of-00063.safetensors b/model-00027-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..07a3e4e6aacc89757ac78611091acb0553362abc --- /dev/null +++ b/model-00027-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c950d79a73fa37c6f9d4e4381c82bdf37c8c2cafdb867560a9687e94b0e542c5 +size 4832102424 diff --git a/model-00028-of-00063.safetensors b/model-00028-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..906792aa094043f7de7321625420579e6bbb5c98 --- /dev/null +++ b/model-00028-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b26052c0d7f8bb8ec2736a9442deb6502d136fa526c5ad53ee6182c902993e9 +size 4295165520 diff --git a/model-00029-of-00063.safetensors b/model-00029-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..96378f40eb68b33af059223504abe4e12348f86f --- /dev/null +++ b/model-00029-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7db2e571dee48466331ec4ac9d557c43edd88e5a9fd738d398bfa3d067202554 +size 4832168192 diff --git a/model-00030-of-00063.safetensors b/model-00030-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8fd65f596afd78f5b7a707788b92a79c498df7e6 --- /dev/null +++ b/model-00030-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59231f35e5106fde9712aed2ddb4d68ea8f070e0f626cfe7754582cdab236101 +size 4832102424 diff --git a/model-00031-of-00063.safetensors b/model-00031-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b36c81abb8db862a332f5bfbb228657019ab1a2 --- /dev/null +++ b/model-00031-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba27f21b1634dd8674efc645731129a33d0d2acea64d89ffc6968384742d895b +size 4295165520 diff --git a/model-00032-of-00063.safetensors b/model-00032-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..775547d8d6b694590e3421cb7a7ec90640c7c5f4 --- /dev/null +++ b/model-00032-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7f446fd5792457f6cf64e8bc1db21695398381010a55f875d2c4b1addd05720 +size 4832168192 diff --git a/model-00033-of-00063.safetensors b/model-00033-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f19f177ef643a47f1a76c75eb05709b0a8da1bb6 --- /dev/null +++ b/model-00033-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f39b8b1c2d27a50f1ed36406cb6f72187fe3c8357fbf326281604eed2db3b3d0 +size 4832102424 diff --git a/model-00034-of-00063.safetensors b/model-00034-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c79e775eafe92bc691dcaf3231c57d80b3d906a --- /dev/null +++ b/model-00034-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b1686f3badefd24ba6f6f3d4ff58d5e38cb77f0f977ad422681bc027706a27f +size 4295165520 diff --git a/model-00035-of-00063.safetensors b/model-00035-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f4b3f501493b0f4ef3acc4dab3ef496ae049f4f --- /dev/null +++ b/model-00035-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b25358bfd1b371ddbf53b85df3bbd2bc2302a7fa07a24235188ccdef04cda9ad +size 4832168192 diff --git a/model-00036-of-00063.safetensors b/model-00036-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..71db3669e11c0044ac5b73cdb9dff6b918d5a561 --- /dev/null +++ b/model-00036-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17f404f15b70a54354bbcd5a45bdfa5d4fc3a0a2abc97022b90441003f41edba +size 4832102424 diff --git a/model-00037-of-00063.safetensors b/model-00037-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4f8501f05e975a6ba69ddbb9379e9cdb1c3d80ee --- /dev/null +++ b/model-00037-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e15cf02d9a5cb926ad3f320ee1acf46251e6e02c6473a1aebcf870e992976cd +size 4295165520 diff --git a/model-00038-of-00063.safetensors b/model-00038-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7397c2ea1b6d32c8c66054b3f6b67bc0343a3aa2 --- /dev/null +++ b/model-00038-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07266edc388c35c263a07d447dfb4c225cf805a3bc24b48c53c830633b0556e6 +size 4832168192 diff --git a/model-00039-of-00063.safetensors b/model-00039-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..78cf96946aacaa3d9fd944308d70c78aa578a508 --- /dev/null +++ b/model-00039-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d651be88789177e0a8ce70e82ee6278a7fec5ee94bcd16af99308cd727b634b +size 4832102424 diff --git a/model-00040-of-00063.safetensors b/model-00040-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9910cc9c15c0b3519896cae3d749d631b9ef2733 --- /dev/null +++ b/model-00040-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdd57ea872b63a09afd309cfb3442c832e99b449312a10904c12748eea47f4ae +size 4295165520 diff --git a/model-00041-of-00063.safetensors b/model-00041-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4a71e80d23da6e508954cf082e780a54b28acd9 --- /dev/null +++ b/model-00041-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60beed15b42fba44fdb075983e1f6a0264cb67b56c5877908e79f41a93838d7c +size 4832168192 diff --git a/model-00042-of-00063.safetensors b/model-00042-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..76aa211f5ef3b86ec07b2dd2ad059e27b07d1767 --- /dev/null +++ b/model-00042-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c408e08466f8be2dcf7264fb337ceaeff351e8c7a39e967428e801a6ed5eb585 +size 4832102424 diff --git a/model-00043-of-00063.safetensors b/model-00043-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8f61d5a1e56d1afc593e6848350f4724de94ee76 --- /dev/null +++ b/model-00043-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e3b218b504bdbdf2b5bb3b7017e7bf597b55dff983736ff8c48da9a84acec4 +size 4295165520 diff --git a/model-00044-of-00063.safetensors b/model-00044-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b43c36b6804b64fc9d51a69fc7b9f54ca5b18990 --- /dev/null +++ b/model-00044-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f06faa7f70300c726c84aaf8bee2850e4545211cf973b71ac56d6cbb04e63ff4 +size 4832168192 diff --git a/model-00045-of-00063.safetensors b/model-00045-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..53c03c3ec2bfd7b2def600f4865caf0b122567d1 --- /dev/null +++ b/model-00045-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2df86e7efbedb5a47094453d26b0345c158dc18ac87a0ab43769e38e17603add +size 4832102424 diff --git a/model-00046-of-00063.safetensors b/model-00046-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b55539e2c3321e47c4ad3bd9faa713b6d21c3ecc --- /dev/null +++ b/model-00046-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7216c735fa661276eff0046564dde6934fb48628a18a44d6e0685a726215ebd2 +size 4295165520 diff --git a/model-00047-of-00063.safetensors b/model-00047-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..55a4fccf5f6be4ac9c34bd33d8e491d17fe2ec6d --- /dev/null +++ b/model-00047-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d699b7c928a7bb82416e6f4935bea583862a8d8dabd80d73624e874f19bf3804 +size 4832168192 diff --git a/model-00048-of-00063.safetensors b/model-00048-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..305461f7e4cac7a239cf863f8a836d795ddc2df6 --- /dev/null +++ b/model-00048-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfb80306a996c78c10a214df178fab9f873d8d5343f9f624bf43b05bb696420a +size 4832102424 diff --git a/model-00049-of-00063.safetensors b/model-00049-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..78cebb7c430fabe4324949ec818a2af8b06e52a8 --- /dev/null +++ b/model-00049-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfddd67de439a9783bca6833cde03422d8805126da2a069a68ef2ef36563aa13 +size 4295165520 diff --git a/model-00050-of-00063.safetensors b/model-00050-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b0c7271a494c80c7194275fab107128f0d080d7e --- /dev/null +++ b/model-00050-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffa853f71ad28de934db2434afbb578ba0724b6a55cb0496e3dfd0ba459c45da +size 4832168192 diff --git a/model-00051-of-00063.safetensors b/model-00051-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9356e848564b7c1502e986319ef3094a77b9cbc1 --- /dev/null +++ b/model-00051-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34701c474525cef54a80ef4f78ec743bb9debf98207fb27ddc88e28b01cc42d7 +size 4832102424 diff --git a/model-00052-of-00063.safetensors b/model-00052-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3e51a9219d8c95834d1ab376fa7b3b81b02342dd --- /dev/null +++ b/model-00052-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4d870f717b6fcf199117faf8e971885eefda38b7bd4624acfc8adc66b02ca24 +size 4295165520 diff --git a/model-00053-of-00063.safetensors b/model-00053-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d904bb3b7f065599500b7d4cf21c44cb50649d29 --- /dev/null +++ b/model-00053-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d31f8ca2f105235acad8a060ff0ca29c7d6dcd53bd0fa1e586adfe7ff0977f41 +size 4832168192 diff --git a/model-00054-of-00063.safetensors b/model-00054-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d31c01c5f24717c0c53849bc587a230fb86a7e01 --- /dev/null +++ b/model-00054-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457d965686ee339aa20a78cabe96999435fb16960e477c9edfdcd2643731bd5b +size 4832102424 diff --git a/model-00055-of-00063.safetensors b/model-00055-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9dfb2147aa1ddd19869d9fe33f8bd55d480ae1ec --- /dev/null +++ b/model-00055-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bad92eff7002bd91b550e2d9b59d590e9c5fc4443391e7cdd276b12cb5a369b1 +size 4295165520 diff --git a/model-00056-of-00063.safetensors b/model-00056-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..48794da41d0039ee6cb84a0d2b27c060fc8251ab --- /dev/null +++ b/model-00056-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1581a64e5c91796f7c6c6d2178c3a130ccf8c8fc80b467f0b27283373b9e2547 +size 4832168192 diff --git a/model-00057-of-00063.safetensors b/model-00057-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4f9aaa049c4be2671301a7ab7ebbd7c33303c779 --- /dev/null +++ b/model-00057-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fa7b6fe4010fdd3f467d29ee7edea0327b445478c8893fe47ab0651728396e0 +size 4832102424 diff --git a/model-00058-of-00063.safetensors b/model-00058-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5cf23aba93258579c8d05075c1ee294965213e7d --- /dev/null +++ b/model-00058-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6220304bf33527a9d52662cadf9402f85e91871d5e49d9150b466c3c9db6af4c +size 4295165520 diff --git a/model-00059-of-00063.safetensors b/model-00059-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1941426b362150a7b5119fda512974a38bbacef4 --- /dev/null +++ b/model-00059-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2183678dbbb453c41560655e5801ce9b05bcd99ffd8018e32269441510d43ae +size 4832168192 diff --git a/model-00060-of-00063.safetensors b/model-00060-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9936e2faf3e604887b7d8401f9df0ead9ee99275 --- /dev/null +++ b/model-00060-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0379c0dcc805b4dbbbc6d42a7ae69bf74d954e30a5831b4c2aea54b156f524b +size 4832102424 diff --git a/model-00061-of-00063.safetensors b/model-00061-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..160e3216e0ecb8692ccd9ea027ede0ec3923fb30 --- /dev/null +++ b/model-00061-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73bc2571bb576c18066064b6ed9f742dd51e54f2de319cf9356dc74b96c0bd01 +size 4295165520 diff --git a/model-00062-of-00063.safetensors b/model-00062-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bd98d517247b1845637e824b1062199b55fef795 --- /dev/null +++ b/model-00062-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:053b0d81c233470855d973e541dd6af80b5f0ccbe9983be119297ca5e7522308 +size 805405128 diff --git a/model-00063-of-00063.safetensors b/model-00063-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fa3db9760ed64c9e8ac77714c352e016ba3556e5 --- /dev/null +++ b/model-00063-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccaf6fe64f579cfa908b4d1bed2e6f4fca302bcad53f947c2f837f4957024835 +size 4982833280 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..e7a3ce31d58e550153e781528cab47045570c8aa --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,1050 @@ +{ + "metadata": { + "total_size": 289154301952 + }, + "weight_map": { + "lm_head.weight": "model-00063-of-00063.safetensors", + "model.embed_tokens.weight": "model-00001-of-00063.safetensors", + "model.layers.0.input_layernorm.weight": "model-00002-of-00063.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00002-of-00063.safetensors", + "model.layers.0.self_attn.k_proj.bias": "model-00002-of-00063.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.0.self_attn.o_proj.bias": "model-00002-of-00063.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.0.self_attn.q_proj.bias": "model-00002-of-00063.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.0.self_attn.v_proj.bias": "model-00002-of-00063.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.1.input_layernorm.weight": "model-00003-of-00063.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00003-of-00063.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00003-of-00063.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00003-of-00063.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00003-of-00063.safetensors", + "model.layers.1.self_attn.k_proj.bias": "model-00002-of-00063.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.1.self_attn.o_proj.bias": "model-00002-of-00063.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.1.self_attn.q_proj.bias": "model-00002-of-00063.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.1.self_attn.v_proj.bias": "model-00002-of-00063.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.10.input_layernorm.weight": "model-00010-of-00063.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00010-of-00063.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00009-of-00063.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00010-of-00063.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00010-of-00063.safetensors", + "model.layers.10.self_attn.k_proj.bias": "model-00009-of-00063.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00009-of-00063.safetensors", + "model.layers.10.self_attn.o_proj.bias": "model-00009-of-00063.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00009-of-00063.safetensors", + "model.layers.10.self_attn.q_proj.bias": "model-00009-of-00063.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00009-of-00063.safetensors", + "model.layers.10.self_attn.v_proj.bias": "model-00009-of-00063.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00009-of-00063.safetensors", + "model.layers.11.input_layernorm.weight": "model-00011-of-00063.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00010-of-00063.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00010-of-00063.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00011-of-00063.safetensors", + "model.layers.11.self_attn.k_proj.bias": "model-00010-of-00063.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00010-of-00063.safetensors", + "model.layers.11.self_attn.o_proj.bias": "model-00010-of-00063.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00010-of-00063.safetensors", + "model.layers.11.self_attn.q_proj.bias": "model-00010-of-00063.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00010-of-00063.safetensors", + "model.layers.11.self_attn.v_proj.bias": "model-00010-of-00063.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00010-of-00063.safetensors", + "model.layers.12.input_layernorm.weight": "model-00011-of-00063.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00011-of-00063.safetensors", + "model.layers.12.self_attn.k_proj.bias": "model-00011-of-00063.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.12.self_attn.o_proj.bias": "model-00011-of-00063.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.12.self_attn.q_proj.bias": "model-00011-of-00063.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.12.self_attn.v_proj.bias": "model-00011-of-00063.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.13.input_layernorm.weight": "model-00012-of-00063.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00012-of-00063.safetensors", + "model.layers.13.self_attn.k_proj.bias": "model-00011-of-00063.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.13.self_attn.o_proj.bias": "model-00012-of-00063.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.13.self_attn.q_proj.bias": "model-00011-of-00063.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.13.self_attn.v_proj.bias": "model-00012-of-00063.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.14.input_layernorm.weight": "model-00013-of-00063.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00013-of-00063.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00013-of-00063.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00013-of-00063.safetensors", + "model.layers.14.self_attn.k_proj.bias": "model-00012-of-00063.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.14.self_attn.o_proj.bias": "model-00012-of-00063.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.14.self_attn.q_proj.bias": "model-00012-of-00063.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.14.self_attn.v_proj.bias": "model-00012-of-00063.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.15.input_layernorm.weight": "model-00014-of-00063.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00013-of-00063.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00013-of-00063.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00014-of-00063.safetensors", + "model.layers.15.self_attn.k_proj.bias": "model-00013-of-00063.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00013-of-00063.safetensors", + "model.layers.15.self_attn.o_proj.bias": "model-00013-of-00063.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00013-of-00063.safetensors", + "model.layers.15.self_attn.q_proj.bias": "model-00013-of-00063.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00013-of-00063.safetensors", + "model.layers.15.self_attn.v_proj.bias": "model-00013-of-00063.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00013-of-00063.safetensors", + "model.layers.16.input_layernorm.weight": "model-00014-of-00063.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00014-of-00063.safetensors", + "model.layers.16.self_attn.k_proj.bias": "model-00014-of-00063.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.16.self_attn.o_proj.bias": "model-00014-of-00063.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.16.self_attn.q_proj.bias": "model-00014-of-00063.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.16.self_attn.v_proj.bias": "model-00014-of-00063.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.17.input_layernorm.weight": "model-00015-of-00063.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00015-of-00063.safetensors", + "model.layers.17.self_attn.k_proj.bias": "model-00014-of-00063.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.17.self_attn.o_proj.bias": "model-00015-of-00063.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.17.self_attn.q_proj.bias": "model-00014-of-00063.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.17.self_attn.v_proj.bias": "model-00015-of-00063.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.18.input_layernorm.weight": "model-00016-of-00063.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00016-of-00063.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00016-of-00063.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00016-of-00063.safetensors", + "model.layers.18.self_attn.k_proj.bias": "model-00015-of-00063.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.18.self_attn.o_proj.bias": "model-00015-of-00063.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.18.self_attn.q_proj.bias": "model-00015-of-00063.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.18.self_attn.v_proj.bias": "model-00015-of-00063.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.19.input_layernorm.weight": "model-00017-of-00063.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00016-of-00063.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00016-of-00063.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00017-of-00063.safetensors", + "model.layers.19.self_attn.k_proj.bias": "model-00016-of-00063.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00016-of-00063.safetensors", + "model.layers.19.self_attn.o_proj.bias": "model-00016-of-00063.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00016-of-00063.safetensors", + "model.layers.19.self_attn.q_proj.bias": "model-00016-of-00063.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00016-of-00063.safetensors", + "model.layers.19.self_attn.v_proj.bias": "model-00016-of-00063.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00016-of-00063.safetensors", + "model.layers.2.input_layernorm.weight": "model-00004-of-00063.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00004-of-00063.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00003-of-00063.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00004-of-00063.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00004-of-00063.safetensors", + "model.layers.2.self_attn.k_proj.bias": "model-00003-of-00063.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00003-of-00063.safetensors", + "model.layers.2.self_attn.o_proj.bias": "model-00003-of-00063.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00003-of-00063.safetensors", + "model.layers.2.self_attn.q_proj.bias": "model-00003-of-00063.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00003-of-00063.safetensors", + "model.layers.2.self_attn.v_proj.bias": "model-00003-of-00063.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00003-of-00063.safetensors", + "model.layers.20.input_layernorm.weight": "model-00017-of-00063.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00017-of-00063.safetensors", + "model.layers.20.self_attn.k_proj.bias": "model-00017-of-00063.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.20.self_attn.o_proj.bias": "model-00017-of-00063.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.20.self_attn.q_proj.bias": "model-00017-of-00063.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.20.self_attn.v_proj.bias": "model-00017-of-00063.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.21.input_layernorm.weight": "model-00018-of-00063.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00018-of-00063.safetensors", + "model.layers.21.self_attn.k_proj.bias": "model-00017-of-00063.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.21.self_attn.o_proj.bias": "model-00018-of-00063.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.21.self_attn.q_proj.bias": "model-00017-of-00063.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.21.self_attn.v_proj.bias": "model-00018-of-00063.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.22.input_layernorm.weight": "model-00019-of-00063.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00019-of-00063.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00019-of-00063.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00019-of-00063.safetensors", + "model.layers.22.self_attn.k_proj.bias": "model-00018-of-00063.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.22.self_attn.o_proj.bias": "model-00018-of-00063.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.22.self_attn.q_proj.bias": "model-00018-of-00063.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.22.self_attn.v_proj.bias": "model-00018-of-00063.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.23.input_layernorm.weight": "model-00020-of-00063.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00019-of-00063.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00019-of-00063.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00020-of-00063.safetensors", + "model.layers.23.self_attn.k_proj.bias": "model-00019-of-00063.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00019-of-00063.safetensors", + "model.layers.23.self_attn.o_proj.bias": "model-00019-of-00063.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00019-of-00063.safetensors", + "model.layers.23.self_attn.q_proj.bias": "model-00019-of-00063.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00019-of-00063.safetensors", + "model.layers.23.self_attn.v_proj.bias": "model-00019-of-00063.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00019-of-00063.safetensors", + "model.layers.24.input_layernorm.weight": "model-00020-of-00063.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00020-of-00063.safetensors", + "model.layers.24.self_attn.k_proj.bias": "model-00020-of-00063.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.24.self_attn.o_proj.bias": "model-00020-of-00063.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.24.self_attn.q_proj.bias": "model-00020-of-00063.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.24.self_attn.v_proj.bias": "model-00020-of-00063.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.25.input_layernorm.weight": "model-00021-of-00063.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00021-of-00063.safetensors", + "model.layers.25.self_attn.k_proj.bias": "model-00020-of-00063.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.25.self_attn.o_proj.bias": "model-00021-of-00063.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.25.self_attn.q_proj.bias": "model-00020-of-00063.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.25.self_attn.v_proj.bias": "model-00021-of-00063.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.26.input_layernorm.weight": "model-00022-of-00063.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00022-of-00063.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00022-of-00063.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00022-of-00063.safetensors", + "model.layers.26.self_attn.k_proj.bias": "model-00021-of-00063.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.26.self_attn.o_proj.bias": "model-00021-of-00063.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.26.self_attn.q_proj.bias": "model-00021-of-00063.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.26.self_attn.v_proj.bias": "model-00021-of-00063.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.27.input_layernorm.weight": "model-00023-of-00063.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00022-of-00063.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00022-of-00063.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00023-of-00063.safetensors", + "model.layers.27.self_attn.k_proj.bias": "model-00022-of-00063.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00022-of-00063.safetensors", + "model.layers.27.self_attn.o_proj.bias": "model-00022-of-00063.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00022-of-00063.safetensors", + "model.layers.27.self_attn.q_proj.bias": "model-00022-of-00063.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00022-of-00063.safetensors", + "model.layers.27.self_attn.v_proj.bias": "model-00022-of-00063.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00022-of-00063.safetensors", + "model.layers.28.input_layernorm.weight": "model-00023-of-00063.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00023-of-00063.safetensors", + "model.layers.28.self_attn.k_proj.bias": "model-00023-of-00063.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.28.self_attn.o_proj.bias": "model-00023-of-00063.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.28.self_attn.q_proj.bias": "model-00023-of-00063.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.28.self_attn.v_proj.bias": "model-00023-of-00063.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.29.input_layernorm.weight": "model-00024-of-00063.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00024-of-00063.safetensors", + "model.layers.29.self_attn.k_proj.bias": "model-00023-of-00063.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.29.self_attn.o_proj.bias": "model-00024-of-00063.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.29.self_attn.q_proj.bias": "model-00023-of-00063.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.29.self_attn.v_proj.bias": "model-00024-of-00063.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.3.input_layernorm.weight": "model-00005-of-00063.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00004-of-00063.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00004-of-00063.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00005-of-00063.safetensors", + "model.layers.3.self_attn.k_proj.bias": "model-00004-of-00063.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00004-of-00063.safetensors", + "model.layers.3.self_attn.o_proj.bias": "model-00004-of-00063.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00004-of-00063.safetensors", + "model.layers.3.self_attn.q_proj.bias": "model-00004-of-00063.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00004-of-00063.safetensors", + "model.layers.3.self_attn.v_proj.bias": "model-00004-of-00063.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00004-of-00063.safetensors", + "model.layers.30.input_layernorm.weight": "model-00025-of-00063.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00025-of-00063.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00025-of-00063.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00025-of-00063.safetensors", + "model.layers.30.self_attn.k_proj.bias": "model-00024-of-00063.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.30.self_attn.o_proj.bias": "model-00024-of-00063.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.30.self_attn.q_proj.bias": "model-00024-of-00063.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.30.self_attn.v_proj.bias": "model-00024-of-00063.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.31.input_layernorm.weight": "model-00026-of-00063.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00025-of-00063.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00025-of-00063.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00026-of-00063.safetensors", + "model.layers.31.self_attn.k_proj.bias": "model-00025-of-00063.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00025-of-00063.safetensors", + "model.layers.31.self_attn.o_proj.bias": "model-00025-of-00063.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00025-of-00063.safetensors", + "model.layers.31.self_attn.q_proj.bias": "model-00025-of-00063.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00025-of-00063.safetensors", + "model.layers.31.self_attn.v_proj.bias": "model-00025-of-00063.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00025-of-00063.safetensors", + "model.layers.32.input_layernorm.weight": "model-00026-of-00063.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00026-of-00063.safetensors", + "model.layers.32.self_attn.k_proj.bias": "model-00026-of-00063.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.32.self_attn.o_proj.bias": "model-00026-of-00063.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.32.self_attn.q_proj.bias": "model-00026-of-00063.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.32.self_attn.v_proj.bias": "model-00026-of-00063.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.33.input_layernorm.weight": "model-00027-of-00063.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00027-of-00063.safetensors", + "model.layers.33.self_attn.k_proj.bias": "model-00026-of-00063.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.33.self_attn.o_proj.bias": "model-00027-of-00063.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.33.self_attn.q_proj.bias": "model-00026-of-00063.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.33.self_attn.v_proj.bias": "model-00027-of-00063.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.34.input_layernorm.weight": "model-00028-of-00063.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00028-of-00063.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00028-of-00063.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00028-of-00063.safetensors", + "model.layers.34.self_attn.k_proj.bias": "model-00027-of-00063.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.34.self_attn.o_proj.bias": "model-00027-of-00063.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.34.self_attn.q_proj.bias": "model-00027-of-00063.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.34.self_attn.v_proj.bias": "model-00027-of-00063.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.35.input_layernorm.weight": "model-00029-of-00063.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00028-of-00063.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00028-of-00063.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00029-of-00063.safetensors", + "model.layers.35.self_attn.k_proj.bias": "model-00028-of-00063.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00028-of-00063.safetensors", + "model.layers.35.self_attn.o_proj.bias": "model-00028-of-00063.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00028-of-00063.safetensors", + "model.layers.35.self_attn.q_proj.bias": "model-00028-of-00063.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00028-of-00063.safetensors", + "model.layers.35.self_attn.v_proj.bias": "model-00028-of-00063.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00028-of-00063.safetensors", + "model.layers.36.input_layernorm.weight": "model-00029-of-00063.safetensors", + "model.layers.36.mlp.down_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.36.mlp.gate_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.36.mlp.up_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00029-of-00063.safetensors", + "model.layers.36.self_attn.k_proj.bias": "model-00029-of-00063.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.36.self_attn.o_proj.bias": "model-00029-of-00063.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.36.self_attn.q_proj.bias": "model-00029-of-00063.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.36.self_attn.v_proj.bias": "model-00029-of-00063.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.37.input_layernorm.weight": "model-00030-of-00063.safetensors", + "model.layers.37.mlp.down_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.37.mlp.gate_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.37.mlp.up_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00030-of-00063.safetensors", + "model.layers.37.self_attn.k_proj.bias": "model-00029-of-00063.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.37.self_attn.o_proj.bias": "model-00030-of-00063.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.37.self_attn.q_proj.bias": "model-00029-of-00063.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.37.self_attn.v_proj.bias": "model-00030-of-00063.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.38.input_layernorm.weight": "model-00031-of-00063.safetensors", + "model.layers.38.mlp.down_proj.weight": "model-00031-of-00063.safetensors", + "model.layers.38.mlp.gate_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.38.mlp.up_proj.weight": "model-00031-of-00063.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00031-of-00063.safetensors", + "model.layers.38.self_attn.k_proj.bias": "model-00030-of-00063.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.38.self_attn.o_proj.bias": "model-00030-of-00063.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.38.self_attn.q_proj.bias": "model-00030-of-00063.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.38.self_attn.v_proj.bias": "model-00030-of-00063.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.39.input_layernorm.weight": "model-00032-of-00063.safetensors", + "model.layers.39.mlp.down_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.39.mlp.gate_proj.weight": "model-00031-of-00063.safetensors", + "model.layers.39.mlp.up_proj.weight": "model-00031-of-00063.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00032-of-00063.safetensors", + "model.layers.39.self_attn.k_proj.bias": "model-00031-of-00063.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00031-of-00063.safetensors", + "model.layers.39.self_attn.o_proj.bias": "model-00031-of-00063.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00031-of-00063.safetensors", + "model.layers.39.self_attn.q_proj.bias": "model-00031-of-00063.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00031-of-00063.safetensors", + "model.layers.39.self_attn.v_proj.bias": "model-00031-of-00063.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00031-of-00063.safetensors", + "model.layers.4.input_layernorm.weight": "model-00005-of-00063.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00005-of-00063.safetensors", + "model.layers.4.self_attn.k_proj.bias": "model-00005-of-00063.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.4.self_attn.o_proj.bias": "model-00005-of-00063.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.4.self_attn.q_proj.bias": "model-00005-of-00063.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.4.self_attn.v_proj.bias": "model-00005-of-00063.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.40.input_layernorm.weight": "model-00032-of-00063.safetensors", + "model.layers.40.mlp.down_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.40.mlp.gate_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.40.mlp.up_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00032-of-00063.safetensors", + "model.layers.40.self_attn.k_proj.bias": "model-00032-of-00063.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.40.self_attn.o_proj.bias": "model-00032-of-00063.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.40.self_attn.q_proj.bias": "model-00032-of-00063.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.40.self_attn.v_proj.bias": "model-00032-of-00063.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.41.input_layernorm.weight": "model-00033-of-00063.safetensors", + "model.layers.41.mlp.down_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.41.mlp.gate_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.41.mlp.up_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00033-of-00063.safetensors", + "model.layers.41.self_attn.k_proj.bias": "model-00032-of-00063.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.41.self_attn.o_proj.bias": "model-00033-of-00063.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.41.self_attn.q_proj.bias": "model-00032-of-00063.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.41.self_attn.v_proj.bias": "model-00033-of-00063.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.42.input_layernorm.weight": "model-00034-of-00063.safetensors", + "model.layers.42.mlp.down_proj.weight": "model-00034-of-00063.safetensors", + "model.layers.42.mlp.gate_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.42.mlp.up_proj.weight": "model-00034-of-00063.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00034-of-00063.safetensors", + "model.layers.42.self_attn.k_proj.bias": "model-00033-of-00063.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.42.self_attn.o_proj.bias": "model-00033-of-00063.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.42.self_attn.q_proj.bias": "model-00033-of-00063.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.42.self_attn.v_proj.bias": "model-00033-of-00063.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.43.input_layernorm.weight": "model-00035-of-00063.safetensors", + "model.layers.43.mlp.down_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.43.mlp.gate_proj.weight": "model-00034-of-00063.safetensors", + "model.layers.43.mlp.up_proj.weight": "model-00034-of-00063.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00035-of-00063.safetensors", + "model.layers.43.self_attn.k_proj.bias": "model-00034-of-00063.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00034-of-00063.safetensors", + "model.layers.43.self_attn.o_proj.bias": "model-00034-of-00063.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00034-of-00063.safetensors", + "model.layers.43.self_attn.q_proj.bias": "model-00034-of-00063.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00034-of-00063.safetensors", + "model.layers.43.self_attn.v_proj.bias": "model-00034-of-00063.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00034-of-00063.safetensors", + "model.layers.44.input_layernorm.weight": "model-00035-of-00063.safetensors", + "model.layers.44.mlp.down_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.44.mlp.gate_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.44.mlp.up_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00035-of-00063.safetensors", + "model.layers.44.self_attn.k_proj.bias": "model-00035-of-00063.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.44.self_attn.o_proj.bias": "model-00035-of-00063.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.44.self_attn.q_proj.bias": "model-00035-of-00063.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.44.self_attn.v_proj.bias": "model-00035-of-00063.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.45.input_layernorm.weight": "model-00036-of-00063.safetensors", + "model.layers.45.mlp.down_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.45.mlp.gate_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.45.mlp.up_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00036-of-00063.safetensors", + "model.layers.45.self_attn.k_proj.bias": "model-00035-of-00063.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.45.self_attn.o_proj.bias": "model-00036-of-00063.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.45.self_attn.q_proj.bias": "model-00035-of-00063.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.45.self_attn.v_proj.bias": "model-00036-of-00063.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.46.input_layernorm.weight": "model-00037-of-00063.safetensors", + "model.layers.46.mlp.down_proj.weight": "model-00037-of-00063.safetensors", + "model.layers.46.mlp.gate_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.46.mlp.up_proj.weight": "model-00037-of-00063.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00037-of-00063.safetensors", + "model.layers.46.self_attn.k_proj.bias": "model-00036-of-00063.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.46.self_attn.o_proj.bias": "model-00036-of-00063.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.46.self_attn.q_proj.bias": "model-00036-of-00063.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.46.self_attn.v_proj.bias": "model-00036-of-00063.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.47.input_layernorm.weight": "model-00038-of-00063.safetensors", + "model.layers.47.mlp.down_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.47.mlp.gate_proj.weight": "model-00037-of-00063.safetensors", + "model.layers.47.mlp.up_proj.weight": "model-00037-of-00063.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00038-of-00063.safetensors", + "model.layers.47.self_attn.k_proj.bias": "model-00037-of-00063.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00037-of-00063.safetensors", + "model.layers.47.self_attn.o_proj.bias": "model-00037-of-00063.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00037-of-00063.safetensors", + "model.layers.47.self_attn.q_proj.bias": "model-00037-of-00063.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00037-of-00063.safetensors", + "model.layers.47.self_attn.v_proj.bias": "model-00037-of-00063.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00037-of-00063.safetensors", + "model.layers.48.input_layernorm.weight": "model-00038-of-00063.safetensors", + "model.layers.48.mlp.down_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.48.mlp.gate_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.48.mlp.up_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00038-of-00063.safetensors", + "model.layers.48.self_attn.k_proj.bias": "model-00038-of-00063.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.48.self_attn.o_proj.bias": "model-00038-of-00063.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.48.self_attn.q_proj.bias": "model-00038-of-00063.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.48.self_attn.v_proj.bias": "model-00038-of-00063.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.49.input_layernorm.weight": "model-00039-of-00063.safetensors", + "model.layers.49.mlp.down_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.49.mlp.gate_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.49.mlp.up_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00039-of-00063.safetensors", + "model.layers.49.self_attn.k_proj.bias": "model-00038-of-00063.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.49.self_attn.o_proj.bias": "model-00039-of-00063.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.49.self_attn.q_proj.bias": "model-00038-of-00063.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.49.self_attn.v_proj.bias": "model-00039-of-00063.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.5.input_layernorm.weight": "model-00006-of-00063.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00006-of-00063.safetensors", + "model.layers.5.self_attn.k_proj.bias": "model-00005-of-00063.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.5.self_attn.o_proj.bias": "model-00006-of-00063.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.5.self_attn.q_proj.bias": "model-00005-of-00063.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.5.self_attn.v_proj.bias": "model-00006-of-00063.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.50.input_layernorm.weight": "model-00040-of-00063.safetensors", + "model.layers.50.mlp.down_proj.weight": "model-00040-of-00063.safetensors", + "model.layers.50.mlp.gate_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.50.mlp.up_proj.weight": "model-00040-of-00063.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00040-of-00063.safetensors", + "model.layers.50.self_attn.k_proj.bias": "model-00039-of-00063.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.50.self_attn.o_proj.bias": "model-00039-of-00063.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.50.self_attn.q_proj.bias": "model-00039-of-00063.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.50.self_attn.v_proj.bias": "model-00039-of-00063.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.51.input_layernorm.weight": "model-00041-of-00063.safetensors", + "model.layers.51.mlp.down_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.51.mlp.gate_proj.weight": "model-00040-of-00063.safetensors", + "model.layers.51.mlp.up_proj.weight": "model-00040-of-00063.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00041-of-00063.safetensors", + "model.layers.51.self_attn.k_proj.bias": "model-00040-of-00063.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00040-of-00063.safetensors", + "model.layers.51.self_attn.o_proj.bias": "model-00040-of-00063.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00040-of-00063.safetensors", + "model.layers.51.self_attn.q_proj.bias": "model-00040-of-00063.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00040-of-00063.safetensors", + "model.layers.51.self_attn.v_proj.bias": "model-00040-of-00063.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00040-of-00063.safetensors", + "model.layers.52.input_layernorm.weight": "model-00041-of-00063.safetensors", + "model.layers.52.mlp.down_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.52.mlp.gate_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.52.mlp.up_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.52.post_attention_layernorm.weight": "model-00041-of-00063.safetensors", + "model.layers.52.self_attn.k_proj.bias": "model-00041-of-00063.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.52.self_attn.o_proj.bias": "model-00041-of-00063.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.52.self_attn.q_proj.bias": "model-00041-of-00063.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.52.self_attn.v_proj.bias": "model-00041-of-00063.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.53.input_layernorm.weight": "model-00042-of-00063.safetensors", + "model.layers.53.mlp.down_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.53.mlp.gate_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.53.mlp.up_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.53.post_attention_layernorm.weight": "model-00042-of-00063.safetensors", + "model.layers.53.self_attn.k_proj.bias": "model-00041-of-00063.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.53.self_attn.o_proj.bias": "model-00042-of-00063.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.53.self_attn.q_proj.bias": "model-00041-of-00063.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.53.self_attn.v_proj.bias": "model-00042-of-00063.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.54.input_layernorm.weight": "model-00043-of-00063.safetensors", + "model.layers.54.mlp.down_proj.weight": "model-00043-of-00063.safetensors", + "model.layers.54.mlp.gate_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.54.mlp.up_proj.weight": "model-00043-of-00063.safetensors", + "model.layers.54.post_attention_layernorm.weight": "model-00043-of-00063.safetensors", + "model.layers.54.self_attn.k_proj.bias": "model-00042-of-00063.safetensors", + "model.layers.54.self_attn.k_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.54.self_attn.o_proj.bias": "model-00042-of-00063.safetensors", + "model.layers.54.self_attn.o_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.54.self_attn.q_proj.bias": "model-00042-of-00063.safetensors", + "model.layers.54.self_attn.q_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.54.self_attn.v_proj.bias": "model-00042-of-00063.safetensors", + "model.layers.54.self_attn.v_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.55.input_layernorm.weight": "model-00044-of-00063.safetensors", + "model.layers.55.mlp.down_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.55.mlp.gate_proj.weight": "model-00043-of-00063.safetensors", + "model.layers.55.mlp.up_proj.weight": "model-00043-of-00063.safetensors", + "model.layers.55.post_attention_layernorm.weight": "model-00044-of-00063.safetensors", + "model.layers.55.self_attn.k_proj.bias": "model-00043-of-00063.safetensors", + "model.layers.55.self_attn.k_proj.weight": "model-00043-of-00063.safetensors", + "model.layers.55.self_attn.o_proj.bias": "model-00043-of-00063.safetensors", + "model.layers.55.self_attn.o_proj.weight": "model-00043-of-00063.safetensors", + "model.layers.55.self_attn.q_proj.bias": "model-00043-of-00063.safetensors", + "model.layers.55.self_attn.q_proj.weight": "model-00043-of-00063.safetensors", + "model.layers.55.self_attn.v_proj.bias": "model-00043-of-00063.safetensors", + "model.layers.55.self_attn.v_proj.weight": "model-00043-of-00063.safetensors", + "model.layers.56.input_layernorm.weight": "model-00044-of-00063.safetensors", + "model.layers.56.mlp.down_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.56.mlp.gate_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.56.mlp.up_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.56.post_attention_layernorm.weight": "model-00044-of-00063.safetensors", + "model.layers.56.self_attn.k_proj.bias": "model-00044-of-00063.safetensors", + "model.layers.56.self_attn.k_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.56.self_attn.o_proj.bias": "model-00044-of-00063.safetensors", + "model.layers.56.self_attn.o_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.56.self_attn.q_proj.bias": "model-00044-of-00063.safetensors", + "model.layers.56.self_attn.q_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.56.self_attn.v_proj.bias": "model-00044-of-00063.safetensors", + "model.layers.56.self_attn.v_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.57.input_layernorm.weight": "model-00045-of-00063.safetensors", + "model.layers.57.mlp.down_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.57.mlp.gate_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.57.mlp.up_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.57.post_attention_layernorm.weight": "model-00045-of-00063.safetensors", + "model.layers.57.self_attn.k_proj.bias": "model-00044-of-00063.safetensors", + "model.layers.57.self_attn.k_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.57.self_attn.o_proj.bias": "model-00045-of-00063.safetensors", + "model.layers.57.self_attn.o_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.57.self_attn.q_proj.bias": "model-00044-of-00063.safetensors", + "model.layers.57.self_attn.q_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.57.self_attn.v_proj.bias": "model-00045-of-00063.safetensors", + "model.layers.57.self_attn.v_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.58.input_layernorm.weight": "model-00046-of-00063.safetensors", + "model.layers.58.mlp.down_proj.weight": "model-00046-of-00063.safetensors", + "model.layers.58.mlp.gate_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.58.mlp.up_proj.weight": "model-00046-of-00063.safetensors", + "model.layers.58.post_attention_layernorm.weight": "model-00046-of-00063.safetensors", + "model.layers.58.self_attn.k_proj.bias": "model-00045-of-00063.safetensors", + "model.layers.58.self_attn.k_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.58.self_attn.o_proj.bias": "model-00045-of-00063.safetensors", + "model.layers.58.self_attn.o_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.58.self_attn.q_proj.bias": "model-00045-of-00063.safetensors", + "model.layers.58.self_attn.q_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.58.self_attn.v_proj.bias": "model-00045-of-00063.safetensors", + "model.layers.58.self_attn.v_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.59.input_layernorm.weight": "model-00047-of-00063.safetensors", + "model.layers.59.mlp.down_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.59.mlp.gate_proj.weight": "model-00046-of-00063.safetensors", + "model.layers.59.mlp.up_proj.weight": "model-00046-of-00063.safetensors", + "model.layers.59.post_attention_layernorm.weight": "model-00047-of-00063.safetensors", + "model.layers.59.self_attn.k_proj.bias": "model-00046-of-00063.safetensors", + "model.layers.59.self_attn.k_proj.weight": "model-00046-of-00063.safetensors", + "model.layers.59.self_attn.o_proj.bias": "model-00046-of-00063.safetensors", + "model.layers.59.self_attn.o_proj.weight": "model-00046-of-00063.safetensors", + "model.layers.59.self_attn.q_proj.bias": "model-00046-of-00063.safetensors", + "model.layers.59.self_attn.q_proj.weight": "model-00046-of-00063.safetensors", + "model.layers.59.self_attn.v_proj.bias": "model-00046-of-00063.safetensors", + "model.layers.59.self_attn.v_proj.weight": "model-00046-of-00063.safetensors", + "model.layers.6.input_layernorm.weight": "model-00007-of-00063.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00007-of-00063.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00007-of-00063.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00007-of-00063.safetensors", + "model.layers.6.self_attn.k_proj.bias": "model-00006-of-00063.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.6.self_attn.o_proj.bias": "model-00006-of-00063.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.6.self_attn.q_proj.bias": "model-00006-of-00063.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.6.self_attn.v_proj.bias": "model-00006-of-00063.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.60.input_layernorm.weight": "model-00047-of-00063.safetensors", + "model.layers.60.mlp.down_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.60.mlp.gate_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.60.mlp.up_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.60.post_attention_layernorm.weight": "model-00047-of-00063.safetensors", + "model.layers.60.self_attn.k_proj.bias": "model-00047-of-00063.safetensors", + "model.layers.60.self_attn.k_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.60.self_attn.o_proj.bias": "model-00047-of-00063.safetensors", + "model.layers.60.self_attn.o_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.60.self_attn.q_proj.bias": "model-00047-of-00063.safetensors", + "model.layers.60.self_attn.q_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.60.self_attn.v_proj.bias": "model-00047-of-00063.safetensors", + "model.layers.60.self_attn.v_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.61.input_layernorm.weight": "model-00048-of-00063.safetensors", + "model.layers.61.mlp.down_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.61.mlp.gate_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.61.mlp.up_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.61.post_attention_layernorm.weight": "model-00048-of-00063.safetensors", + "model.layers.61.self_attn.k_proj.bias": "model-00047-of-00063.safetensors", + "model.layers.61.self_attn.k_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.61.self_attn.o_proj.bias": "model-00048-of-00063.safetensors", + "model.layers.61.self_attn.o_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.61.self_attn.q_proj.bias": "model-00047-of-00063.safetensors", + "model.layers.61.self_attn.q_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.61.self_attn.v_proj.bias": "model-00048-of-00063.safetensors", + "model.layers.61.self_attn.v_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.62.input_layernorm.weight": "model-00049-of-00063.safetensors", + "model.layers.62.mlp.down_proj.weight": "model-00049-of-00063.safetensors", + "model.layers.62.mlp.gate_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.62.mlp.up_proj.weight": "model-00049-of-00063.safetensors", + "model.layers.62.post_attention_layernorm.weight": "model-00049-of-00063.safetensors", + "model.layers.62.self_attn.k_proj.bias": "model-00048-of-00063.safetensors", + "model.layers.62.self_attn.k_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.62.self_attn.o_proj.bias": "model-00048-of-00063.safetensors", + "model.layers.62.self_attn.o_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.62.self_attn.q_proj.bias": "model-00048-of-00063.safetensors", + "model.layers.62.self_attn.q_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.62.self_attn.v_proj.bias": "model-00048-of-00063.safetensors", + "model.layers.62.self_attn.v_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.63.input_layernorm.weight": "model-00050-of-00063.safetensors", + "model.layers.63.mlp.down_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.63.mlp.gate_proj.weight": "model-00049-of-00063.safetensors", + "model.layers.63.mlp.up_proj.weight": "model-00049-of-00063.safetensors", + "model.layers.63.post_attention_layernorm.weight": "model-00050-of-00063.safetensors", + "model.layers.63.self_attn.k_proj.bias": "model-00049-of-00063.safetensors", + "model.layers.63.self_attn.k_proj.weight": "model-00049-of-00063.safetensors", + "model.layers.63.self_attn.o_proj.bias": "model-00049-of-00063.safetensors", + "model.layers.63.self_attn.o_proj.weight": "model-00049-of-00063.safetensors", + "model.layers.63.self_attn.q_proj.bias": "model-00049-of-00063.safetensors", + "model.layers.63.self_attn.q_proj.weight": "model-00049-of-00063.safetensors", + "model.layers.63.self_attn.v_proj.bias": "model-00049-of-00063.safetensors", + "model.layers.63.self_attn.v_proj.weight": "model-00049-of-00063.safetensors", + "model.layers.64.input_layernorm.weight": "model-00050-of-00063.safetensors", + "model.layers.64.mlp.down_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.64.mlp.gate_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.64.mlp.up_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.64.post_attention_layernorm.weight": "model-00050-of-00063.safetensors", + "model.layers.64.self_attn.k_proj.bias": "model-00050-of-00063.safetensors", + "model.layers.64.self_attn.k_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.64.self_attn.o_proj.bias": "model-00050-of-00063.safetensors", + "model.layers.64.self_attn.o_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.64.self_attn.q_proj.bias": "model-00050-of-00063.safetensors", + "model.layers.64.self_attn.q_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.64.self_attn.v_proj.bias": "model-00050-of-00063.safetensors", + "model.layers.64.self_attn.v_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.65.input_layernorm.weight": "model-00051-of-00063.safetensors", + "model.layers.65.mlp.down_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.65.mlp.gate_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.65.mlp.up_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.65.post_attention_layernorm.weight": "model-00051-of-00063.safetensors", + "model.layers.65.self_attn.k_proj.bias": "model-00050-of-00063.safetensors", + "model.layers.65.self_attn.k_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.65.self_attn.o_proj.bias": "model-00051-of-00063.safetensors", + "model.layers.65.self_attn.o_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.65.self_attn.q_proj.bias": "model-00050-of-00063.safetensors", + "model.layers.65.self_attn.q_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.65.self_attn.v_proj.bias": "model-00051-of-00063.safetensors", + "model.layers.65.self_attn.v_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.66.input_layernorm.weight": "model-00052-of-00063.safetensors", + "model.layers.66.mlp.down_proj.weight": "model-00052-of-00063.safetensors", + "model.layers.66.mlp.gate_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.66.mlp.up_proj.weight": "model-00052-of-00063.safetensors", + "model.layers.66.post_attention_layernorm.weight": "model-00052-of-00063.safetensors", + "model.layers.66.self_attn.k_proj.bias": "model-00051-of-00063.safetensors", + "model.layers.66.self_attn.k_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.66.self_attn.o_proj.bias": "model-00051-of-00063.safetensors", + "model.layers.66.self_attn.o_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.66.self_attn.q_proj.bias": "model-00051-of-00063.safetensors", + "model.layers.66.self_attn.q_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.66.self_attn.v_proj.bias": "model-00051-of-00063.safetensors", + "model.layers.66.self_attn.v_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.67.input_layernorm.weight": "model-00053-of-00063.safetensors", + "model.layers.67.mlp.down_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.67.mlp.gate_proj.weight": "model-00052-of-00063.safetensors", + "model.layers.67.mlp.up_proj.weight": "model-00052-of-00063.safetensors", + "model.layers.67.post_attention_layernorm.weight": "model-00053-of-00063.safetensors", + "model.layers.67.self_attn.k_proj.bias": "model-00052-of-00063.safetensors", + "model.layers.67.self_attn.k_proj.weight": "model-00052-of-00063.safetensors", + "model.layers.67.self_attn.o_proj.bias": "model-00052-of-00063.safetensors", + "model.layers.67.self_attn.o_proj.weight": "model-00052-of-00063.safetensors", + "model.layers.67.self_attn.q_proj.bias": "model-00052-of-00063.safetensors", + "model.layers.67.self_attn.q_proj.weight": "model-00052-of-00063.safetensors", + "model.layers.67.self_attn.v_proj.bias": "model-00052-of-00063.safetensors", + "model.layers.67.self_attn.v_proj.weight": "model-00052-of-00063.safetensors", + "model.layers.68.input_layernorm.weight": "model-00053-of-00063.safetensors", + "model.layers.68.mlp.down_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.68.mlp.gate_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.68.mlp.up_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.68.post_attention_layernorm.weight": "model-00053-of-00063.safetensors", + "model.layers.68.self_attn.k_proj.bias": "model-00053-of-00063.safetensors", + "model.layers.68.self_attn.k_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.68.self_attn.o_proj.bias": "model-00053-of-00063.safetensors", + "model.layers.68.self_attn.o_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.68.self_attn.q_proj.bias": "model-00053-of-00063.safetensors", + "model.layers.68.self_attn.q_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.68.self_attn.v_proj.bias": "model-00053-of-00063.safetensors", + "model.layers.68.self_attn.v_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.69.input_layernorm.weight": "model-00054-of-00063.safetensors", + "model.layers.69.mlp.down_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.69.mlp.gate_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.69.mlp.up_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.69.post_attention_layernorm.weight": "model-00054-of-00063.safetensors", + "model.layers.69.self_attn.k_proj.bias": "model-00053-of-00063.safetensors", + "model.layers.69.self_attn.k_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.69.self_attn.o_proj.bias": "model-00054-of-00063.safetensors", + "model.layers.69.self_attn.o_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.69.self_attn.q_proj.bias": "model-00053-of-00063.safetensors", + "model.layers.69.self_attn.q_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.69.self_attn.v_proj.bias": "model-00054-of-00063.safetensors", + "model.layers.69.self_attn.v_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.7.input_layernorm.weight": "model-00008-of-00063.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00007-of-00063.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00007-of-00063.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00008-of-00063.safetensors", + "model.layers.7.self_attn.k_proj.bias": "model-00007-of-00063.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00007-of-00063.safetensors", + "model.layers.7.self_attn.o_proj.bias": "model-00007-of-00063.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00007-of-00063.safetensors", + "model.layers.7.self_attn.q_proj.bias": "model-00007-of-00063.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00007-of-00063.safetensors", + "model.layers.7.self_attn.v_proj.bias": "model-00007-of-00063.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00007-of-00063.safetensors", + "model.layers.70.input_layernorm.weight": "model-00055-of-00063.safetensors", + "model.layers.70.mlp.down_proj.weight": "model-00055-of-00063.safetensors", + "model.layers.70.mlp.gate_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.70.mlp.up_proj.weight": "model-00055-of-00063.safetensors", + "model.layers.70.post_attention_layernorm.weight": "model-00055-of-00063.safetensors", + "model.layers.70.self_attn.k_proj.bias": "model-00054-of-00063.safetensors", + "model.layers.70.self_attn.k_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.70.self_attn.o_proj.bias": "model-00054-of-00063.safetensors", + "model.layers.70.self_attn.o_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.70.self_attn.q_proj.bias": "model-00054-of-00063.safetensors", + "model.layers.70.self_attn.q_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.70.self_attn.v_proj.bias": "model-00054-of-00063.safetensors", + "model.layers.70.self_attn.v_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.71.input_layernorm.weight": "model-00056-of-00063.safetensors", + "model.layers.71.mlp.down_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.71.mlp.gate_proj.weight": "model-00055-of-00063.safetensors", + "model.layers.71.mlp.up_proj.weight": "model-00055-of-00063.safetensors", + "model.layers.71.post_attention_layernorm.weight": "model-00056-of-00063.safetensors", + "model.layers.71.self_attn.k_proj.bias": "model-00055-of-00063.safetensors", + "model.layers.71.self_attn.k_proj.weight": "model-00055-of-00063.safetensors", + "model.layers.71.self_attn.o_proj.bias": "model-00055-of-00063.safetensors", + "model.layers.71.self_attn.o_proj.weight": "model-00055-of-00063.safetensors", + "model.layers.71.self_attn.q_proj.bias": "model-00055-of-00063.safetensors", + "model.layers.71.self_attn.q_proj.weight": "model-00055-of-00063.safetensors", + "model.layers.71.self_attn.v_proj.bias": "model-00055-of-00063.safetensors", + "model.layers.71.self_attn.v_proj.weight": "model-00055-of-00063.safetensors", + "model.layers.72.input_layernorm.weight": "model-00056-of-00063.safetensors", + "model.layers.72.mlp.down_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.72.mlp.gate_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.72.mlp.up_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.72.post_attention_layernorm.weight": "model-00056-of-00063.safetensors", + "model.layers.72.self_attn.k_proj.bias": "model-00056-of-00063.safetensors", + "model.layers.72.self_attn.k_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.72.self_attn.o_proj.bias": "model-00056-of-00063.safetensors", + "model.layers.72.self_attn.o_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.72.self_attn.q_proj.bias": "model-00056-of-00063.safetensors", + "model.layers.72.self_attn.q_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.72.self_attn.v_proj.bias": "model-00056-of-00063.safetensors", + "model.layers.72.self_attn.v_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.73.input_layernorm.weight": "model-00057-of-00063.safetensors", + "model.layers.73.mlp.down_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.73.mlp.gate_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.73.mlp.up_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.73.post_attention_layernorm.weight": "model-00057-of-00063.safetensors", + "model.layers.73.self_attn.k_proj.bias": "model-00056-of-00063.safetensors", + "model.layers.73.self_attn.k_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.73.self_attn.o_proj.bias": "model-00057-of-00063.safetensors", + "model.layers.73.self_attn.o_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.73.self_attn.q_proj.bias": "model-00056-of-00063.safetensors", + "model.layers.73.self_attn.q_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.73.self_attn.v_proj.bias": "model-00057-of-00063.safetensors", + "model.layers.73.self_attn.v_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.74.input_layernorm.weight": "model-00058-of-00063.safetensors", + "model.layers.74.mlp.down_proj.weight": "model-00058-of-00063.safetensors", + "model.layers.74.mlp.gate_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.74.mlp.up_proj.weight": "model-00058-of-00063.safetensors", + "model.layers.74.post_attention_layernorm.weight": "model-00058-of-00063.safetensors", + "model.layers.74.self_attn.k_proj.bias": "model-00057-of-00063.safetensors", + "model.layers.74.self_attn.k_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.74.self_attn.o_proj.bias": "model-00057-of-00063.safetensors", + "model.layers.74.self_attn.o_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.74.self_attn.q_proj.bias": "model-00057-of-00063.safetensors", + "model.layers.74.self_attn.q_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.74.self_attn.v_proj.bias": "model-00057-of-00063.safetensors", + "model.layers.74.self_attn.v_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.75.input_layernorm.weight": "model-00059-of-00063.safetensors", + "model.layers.75.mlp.down_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.75.mlp.gate_proj.weight": "model-00058-of-00063.safetensors", + "model.layers.75.mlp.up_proj.weight": "model-00058-of-00063.safetensors", + "model.layers.75.post_attention_layernorm.weight": "model-00059-of-00063.safetensors", + "model.layers.75.self_attn.k_proj.bias": "model-00058-of-00063.safetensors", + "model.layers.75.self_attn.k_proj.weight": "model-00058-of-00063.safetensors", + "model.layers.75.self_attn.o_proj.bias": "model-00058-of-00063.safetensors", + "model.layers.75.self_attn.o_proj.weight": "model-00058-of-00063.safetensors", + "model.layers.75.self_attn.q_proj.bias": "model-00058-of-00063.safetensors", + "model.layers.75.self_attn.q_proj.weight": "model-00058-of-00063.safetensors", + "model.layers.75.self_attn.v_proj.bias": "model-00058-of-00063.safetensors", + "model.layers.75.self_attn.v_proj.weight": "model-00058-of-00063.safetensors", + "model.layers.76.input_layernorm.weight": "model-00059-of-00063.safetensors", + "model.layers.76.mlp.down_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.76.mlp.gate_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.76.mlp.up_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.76.post_attention_layernorm.weight": "model-00059-of-00063.safetensors", + "model.layers.76.self_attn.k_proj.bias": "model-00059-of-00063.safetensors", + "model.layers.76.self_attn.k_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.76.self_attn.o_proj.bias": "model-00059-of-00063.safetensors", + "model.layers.76.self_attn.o_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.76.self_attn.q_proj.bias": "model-00059-of-00063.safetensors", + "model.layers.76.self_attn.q_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.76.self_attn.v_proj.bias": "model-00059-of-00063.safetensors", + "model.layers.76.self_attn.v_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.77.input_layernorm.weight": "model-00060-of-00063.safetensors", + "model.layers.77.mlp.down_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.77.mlp.gate_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.77.mlp.up_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.77.post_attention_layernorm.weight": "model-00060-of-00063.safetensors", + "model.layers.77.self_attn.k_proj.bias": "model-00059-of-00063.safetensors", + "model.layers.77.self_attn.k_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.77.self_attn.o_proj.bias": "model-00060-of-00063.safetensors", + "model.layers.77.self_attn.o_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.77.self_attn.q_proj.bias": "model-00059-of-00063.safetensors", + "model.layers.77.self_attn.q_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.77.self_attn.v_proj.bias": "model-00060-of-00063.safetensors", + "model.layers.77.self_attn.v_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.78.input_layernorm.weight": "model-00061-of-00063.safetensors", + "model.layers.78.mlp.down_proj.weight": "model-00061-of-00063.safetensors", + "model.layers.78.mlp.gate_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.78.mlp.up_proj.weight": "model-00061-of-00063.safetensors", + "model.layers.78.post_attention_layernorm.weight": "model-00061-of-00063.safetensors", + "model.layers.78.self_attn.k_proj.bias": "model-00060-of-00063.safetensors", + "model.layers.78.self_attn.k_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.78.self_attn.o_proj.bias": "model-00060-of-00063.safetensors", + "model.layers.78.self_attn.o_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.78.self_attn.q_proj.bias": "model-00060-of-00063.safetensors", + "model.layers.78.self_attn.q_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.78.self_attn.v_proj.bias": "model-00060-of-00063.safetensors", + "model.layers.78.self_attn.v_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.79.input_layernorm.weight": "model-00062-of-00063.safetensors", + "model.layers.79.mlp.down_proj.weight": "model-00062-of-00063.safetensors", + "model.layers.79.mlp.gate_proj.weight": "model-00061-of-00063.safetensors", + "model.layers.79.mlp.up_proj.weight": "model-00061-of-00063.safetensors", + "model.layers.79.post_attention_layernorm.weight": "model-00062-of-00063.safetensors", + "model.layers.79.self_attn.k_proj.bias": "model-00061-of-00063.safetensors", + "model.layers.79.self_attn.k_proj.weight": "model-00061-of-00063.safetensors", + "model.layers.79.self_attn.o_proj.bias": "model-00061-of-00063.safetensors", + "model.layers.79.self_attn.o_proj.weight": "model-00061-of-00063.safetensors", + "model.layers.79.self_attn.q_proj.bias": "model-00061-of-00063.safetensors", + "model.layers.79.self_attn.q_proj.weight": "model-00061-of-00063.safetensors", + "model.layers.79.self_attn.v_proj.bias": "model-00061-of-00063.safetensors", + "model.layers.79.self_attn.v_proj.weight": "model-00061-of-00063.safetensors", + "model.layers.8.input_layernorm.weight": "model-00008-of-00063.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00008-of-00063.safetensors", + "model.layers.8.self_attn.k_proj.bias": "model-00008-of-00063.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.8.self_attn.o_proj.bias": "model-00008-of-00063.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.8.self_attn.q_proj.bias": "model-00008-of-00063.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.8.self_attn.v_proj.bias": "model-00008-of-00063.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.9.input_layernorm.weight": "model-00009-of-00063.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00009-of-00063.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00009-of-00063.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00009-of-00063.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00009-of-00063.safetensors", + "model.layers.9.self_attn.k_proj.bias": "model-00008-of-00063.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.9.self_attn.o_proj.bias": "model-00009-of-00063.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00009-of-00063.safetensors", + "model.layers.9.self_attn.q_proj.bias": "model-00008-of-00063.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.9.self_attn.v_proj.bias": "model-00009-of-00063.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00009-of-00063.safetensors", + "model.norm.weight": "model-00062-of-00063.safetensors" + } +}